diff --git a/README.md b/README.md
index dbe559eed9ef9a1990748b7c032ea54266233949..2fdd1cc9760f1785ca781b585dd797a31820e0c1 100644
--- a/README.md
+++ b/README.md
@@ -13,15 +13,10 @@ OneFlow 相关的模型转换工具
 
 #### 简介
 
-oneflow_onnx 工具包含两个功能，一个是将 OneFlow 导出 ONNX，另外一个是将各个训练框架导出的 ONNX 模型转换为 OneFlow 的模型。本工程已经适配了 TensorFlow/Pytorch/PaddlePaddle 框架的预训练模型通过导出 ONNX 转换为 OneFlow（我们将这一功能叫作 X2OneFlow）。
 
 - OneFlow2ONNX 模型支持，支持 OneFlow 静态图模型转为 ONNX，可转换由 [flow.checkpoint.save ](https://docs.oneflow.org/basics_topics/model_load_save.html) 方法保存下来的 OneFlow 模型，详情可以参考 [OneFlow2ONNX 模型列表](docs/oneflow2onnx/oneflow2onnx_model_zoo.md)。
-- X2OneFlow 模型支持，支持将 TensorFlow/Pytorch/PaddlePaddle 的模型通过 ONNX 转换为 OneFlow 的模型，详情可以参考 [X2OneFlow 模型列表](docs/x2oneflow/x2oneflow_model_zoo.md)。
 - OneFlow2ONNX 算子支持，目前稳定支持导出 ONNX Opset10，部分 OneFlow 算子支持更低的 ONNX Opset 转换，详情可以参考 [OneFlow2ONNX 算子列表](docs/oneflow2onnx/op_list.md)。
-- X2OneFlow 算子支持，目前稳定支持 TensorFlow/Pytorch/PaddlePaddle 中涵盖大部分 CV 场景的算子，详情可以参考 [X2OneFlow 算子列表](docs/x2oneflow/op_list.md)。
-- 代码生成支持，支持将 TensorFlow/Pytorch/PaddlePaddle 的模型通过 ONNX 转换为 OneFlow 的模型并同时生成 OneFlow 的代码，详情可以参考 [X2OneFlow 代码生成模型列表](docs/x2oneflow/code_gen.md)。
 
-> 目前 OneFlow2ONNX 支持80+的 OneFlow OP 导出为 ONNX OP。X2OneFlow 支持80个 ONNX OP，50+个 TensorFlow OP，80+个 Pytorch OP，50+个 PaddlePaddle OP，覆盖了大部分 CV 分类模型常用的操作。注意我们支持的 OP 和模型均为动态图 API 下的 OP 和模型，要求 PaddlePaddle 的版本>=2.0.0，TensorFlow >=2.0.0，Pytorch 无明确版本要求。目前 X2OneFlow 已经成功转换了50+个 TensorFlow/Pytorch/PaddlePaddle 官方模型。欢迎体验此项目。
 
 #### 环境依赖
 
@@ -33,18 +28,7 @@ onnx>=1.8.0
 onnx-simplifier>=0.3.3
 onnxoptimizer>=0.2.5
 onnxruntime>=1.6.0
-oneflow (https://github.com/Oneflow-Inc/oneflow#install-with-pip-package)
-```
-
-
-如果你想使用 X2OneFlow（X 代表 TensorFlow/Pytorch/PaddlePaddle）则需要安装对应的深度学习框架。依赖如下：
-
-```sh
-pytorch>=1.7.0
-paddlepaddle>=2.0.0
-paddle2onnx>=0.6
-tensorflow>=2.0.0
-tf2onnx>=1.8.4
+oneflow>=0.5.0
 ```
 
 #### 安装
@@ -70,35 +54,9 @@ python3 setup.py install
 #### 相关文档
 
 - [OneFlow2ONNX模型列表](docs/oneflow2onnx/oneflow2onnx_model_zoo.md)
-- [X2OneFlow模型列表](docs/x2oneflow/x2oneflow_model_zoo.md)
 - [OneFlow2ONNX算子列表](docs/oneflow2onnx/op_list.md)
-- [X2OneFlow算子列表](docs/x2oneflow/op_list.md)
 - [使用示例](examples/README.md)
 
-### nchw2nhwc_tool
-
-#### 简介
-
-本工具的功能是将 OneFlow 训练的 NCHW 排布的权重转换为 NHWC 排布，使用方法[在这里](nchw2nhwc_tool/README.md)
-
-
-### save_serving_tool
-
-#### 简介
-本工具的目的是将 OneFlow 训练的模型转换为 Serving 端可用的模型，使用方法[在这里](save_serving_tool/README.md)
-
-
-### 项目进展
-
-
-- 2021/4/13 支持ResNet18代码自动生成，量化OP转换失败暂时移除c测试脚本，发布0.2.2 wheel包。
-- 2021/4/14 修复CI错误，支持X2OneFlow的所有模型自动代码生成功能，发布0.2.3 whell包。
-- 2020/4/15 完成X2OneFlow所有模型的自动代码生成功能，发布0.3.0 whell包。
-- 2020/4/16 将Expand OP并入主分支，并修复导入oneflow_api报错的bug，发布0.3.1 whell包。
-- 2020/4/16 解决自动代码生成遗留问题，并将自动代码生成的测试加入CI，发布0.3.2 whell包。
-- 2020/6/21 导出ONNX新增PreLU/LeakyReLU OP，修复自动代码生成bug，发布0.3.3 whell包。
-- 2020/6/23 导出ONNX新增Constant OP，修复BN只有NC两个维度（InsightFace）导出的bug以及禁用导出ONNX时默认开启的global function，发布0.3.3.20210623 whell包。
-- 2020/6/24 导出ONNX新增Flatten OP，发布0.3.4 whell包。
 
 
 
diff --git a/ci/test.sh b/ci/test.sh
index 9b93d423c572abc4d03cc5de926ac6d75420181a..7a15eb5d60d5e09c955d7160e3c80a8bde1d1c1f 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -4,39 +4,5 @@ python3 -m pip install --user --upgrade pip
 python3 -m pip install -r test-requirements.txt --user --extra-index-url https://pypi.ngc.nvidia.com
 if [ -f requirements.txt ]; then python3 -m pip install -r requirements.txt --user; fi
 python3 -m pip install oneflow --user -U -f https://staging.oneflow.info/branch/master/cu110
-python3 -m pip install gast==0.3.3 --user
 python3 setup.py install
-python3 examples/tensorrt_qat/test_lenet_qat_train.py
-python3 -m pytest -s examples/tensorrt_qat/test_lenet_qat.py
-python3 examples/tensorrt_qat/test_mobilenet_qat_train.py
-python3 -m pytest -s examples/tensorrt_qat/test_mobilenet_qat.py
 python3 -m pytest examples/oneflow2onnx
-python3 -m pytest examples/x2oneflow/pytorch2oneflow/nodes
-python3 -m pytest examples/x2oneflow/pytorch2oneflow/models
-python3 -m pytest examples/x2oneflow/tensorflow2oneflow/nodes
-python3 -m pytest examples/x2oneflow/tensorflow2oneflow/models
-python3 -m pytest examples/x2oneflow/paddle2oneflow/nodes
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_alexnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_darknet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_densenet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_dpn.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_efficientnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_ghostnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_googlenet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_inceptionv3.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_inceptionv4.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_mobilenetv1.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_mobilenetv2.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_mobilenetv3.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_regnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_repvgg.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_res2net.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_resnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_resnext.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_se_resnext.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_shufflenet_v2.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_squeezenet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_vggnet.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_vision_transformer.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_xception_deeplab.py
-python3 -m pytest examples/x2oneflow/paddle2oneflow/models/test_xception.py
diff --git a/docs/README_en.md b/docs/README_en.md
index eb61908b0aae757e405944763b2729dbd0985314..41982994d426eab91e99453dffa8b7712c9b3f9c 100644
--- a/docs/README_en.md
+++ b/docs/README_en.md
@@ -4,10 +4,3 @@
 
 1. op_list.md: supported OP in the transformation process from OneFlow to ONNX.
 2. oneflow2onnx_model_zoo.md: tested model list.
-
-# x2oneflow
-
-1. op_list.md: transform OP from other frameworks into OneFlow's format
-2. x2oneflow_model_zoo.md: tested model list.
-3. code_gen.md: supported model list for OneFlow code generation .
-
diff --git a/docs/x2oneflow/code_gen.md b/docs/x2oneflow/code_gen.md
deleted file mode 100644
index 0c370abf8a88e9d3092045c9f919bfea3cfb6ad3..0000000000000000000000000000000000000000
--- a/docs/x2oneflow/code_gen.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# X2OneFlow 代码生成
-
-> 这里记录在X2OneFlow中生成OneFlow代码的支持情况
-
-## Pytorch
-
-| 模型         | 是否支持  |
-| ------------ | -------- |
-| LeNet        | Yes      |
-| AlexNet      | Yes      |
-| VGGNet       | Yes      |
-| GoogleNet    | Yes      |
-| ResNet       | Yes      |
-| PreActResNet | Yes      |
-| ResNext      | Yes      |
-| SENet        | Yes      |
-| MobileNetV1  | Yes      |
-| MobileNetV2  | Yes      |
-| MobileNetV3  | Yes      |
-| RegNet       | Yes      |
-| DenseNet     | Yes      |
-| EfficientNet | Yes      |
-| InceptionNet | Yes      |
-| ShuffleNetV1 | Yes      |
-| ShuffleNetV2 | Yes      |
-| SqueezeNet   | Yes      |
-| DPN          | Yes      |
-| PNASNet      | Yes      |
-| DLANet       | Yes      |
-
-## TensorFlow
-
-| 模型         | 是否支持  |
-| ------------ | -------- |
-| VGGNet       | Yes      |
-| ResNet       | Yes      |
-| ResNetV2     | Yes      |
-| XceptionNet  | Yes      |
-| MobileNetV1  | Yes      |
-| MobileNetV2  | Yes      |
-| MobileNetV3  | Yes      |
-| DenseNet     | Yes      |
-| EfficientNet | Yes      |
-| InceptionNet | Yes      |
-
-## PaddlePaddle
-
-| 模型               | 是否支持  |
-| ------------------ | -------- |
-| AlexNet            | Yes      |
-| VGGNet             | Yes      |
-| GoogleNet          | Yes      |
-| ResNet             | Yes      |
-| ResNext            | Yes      |
-| SE_ResNext         | Yes      |
-| SENet              | Yes      |
-| MobileNetV1        | Yes      |
-| MobileNetV2        | Yes      |
-| MobileNetV3        | Yes      |
-| RegNet             | Yes      |
-| DenseNet           | Yes      |
-| EfficientNet       | Yes      |
-| InceptionNet       | Yes      |
-| ShuffleNetV2       | Yes      |
-| SqueezeNet         | Yes      |
-| DPNNet             | Yes      |
-| DarkNet            | Yes      |
-| GhostNet           | Yes      |
-| RepVGG             | Yes      |
-| XceptionNet        | Yes      |
-| Xception_DeepLab   | Yes      |
-| Vision_Transformer | Yes      |
-| Res2Net            | Yes      |
\ No newline at end of file
diff --git a/docs/x2oneflow/op_list.md b/docs/x2oneflow/op_list.md
deleted file mode 100644
index e1437b36631bab2750cdc2a7aa016f86b505c133..0000000000000000000000000000000000000000
--- a/docs/x2oneflow/op_list.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# X2OneFlow 支持的OP列表
-
-> 目前X2OneFlow 支持80个ONNX OP，50+个TensorFlow OP，80+个Pytorch OP，50+个PaddlePaddle OP，覆盖了大部分CV分类模型常用的操作。注意我们支持的OP和模型均为动态图API下的OP和模型，要求PaddlePaddle的版本>=2.0.0，TensorFlow>=2.0.0，Pytorch无明确版本要求。
-
-**注：** 目前，部分OP暂未支持，如您在转换过程中出现OP不支持的情况，可自行添加或反馈给我们。欢迎通过[ISSUE](https://github.com/Oneflow-Inc/oneflow_convert_tools/issues/new)反馈的方式告知我们(模型名，代码实现或模型获取方式)，我们会及时跟进：）
-
-
-
-## ONNX
-
-| 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP |
-|------|------|------|------|------|------|------|------|
-| 1  | Conv      | 2 |BatchNormalization| 3  |    MaxPool    | 4 | AveragePool|
-| 5  | Concat    | 6 |   ReLU           | 7  |AdaptiveMaxPool| 8 | Softmax    |
-| 9  | Unsqueeze | 10 | Transpose       | 11 | Clip          | 12 | Gather    |
-| 13 | Slice     | 14 | Split           | 15 | Flatten       | 16 | Add       |
-| 17 | Sub       | 18 | Mul             | 19 | Div           | 20 |Sqrt       |
-| 21 |Pow        | 22 | Tanh            | 23 | Sigmoid       | 24 | Cast      |
-| 25 | Pad       | 26 | ReduceMean      | 27 | Reshape       | 28 | AdaptiveAvgPool|
-|29 | Squeeze    | 30 | Expand          | 31 | Gather        | 32 | Slice   |
-|33 | Split      | 34 | Min             | 35 | Max           | 36 | Constant |
-|37 | HardSigmoid| 38 | Gemm            | 39 | MatMul        | 40 | Erf      |
-|41 | ~~Cast~~   | 42 | GlobalMaxPool   | 43 | GlobalAveragePool |44|ReduceMax|
-|45 | Identity   | 46 | Rsqrt           | 47 | LeakyRelu     | 48 | Abs       |
-|49 | Exp        | 50 | Reciprocal      | 51 | Floor         | 52 | ArgMax    |
-|53 | Range      | 54 | Greator         | 55 | Less          | 56 | Softplus  |
-|57 | Neg        | 58 | Ceil            | 59 | Where         | 60 | Equal     |
-|61 | Sign       | 62 | NonZero         | 63 | Acos          | 64 | Acosh     |
-|65 | ArgMin     | 66 | Asin            | 67 | Atan          | 68 | Cos       |
-|69 | Elu        | 70 | Exp             | 71 | Log           | 72 | LogSoftmax|
-|73 |ReduceLogSumExp|74| ReduceMin      | 75 | ReduceProd    | 76 | Round     |
-|77 | Sin        | 78 | Tanh            | 79 |Tan            | 80 | PReLU     |
-
-
-## TensorFlow
-
-
-| 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP |
-|------|------|------|------|------|------|------|------|
-| 1  | Relu             | 2  | Relu6          | 3  | Shape          | 4  | Abs                   |
-| 5  | Sigmoid          | 6  | Exp            | 7  | Rsqrt          | 8  | Swish                 |
-| 9  | Tanh             | 10 | LeakyRelu      | 11 | Add            | 12 | Greater               |
-| 13 | Sub              | 14 | Maximum        | 15 | Mul            | 16 | FloorDiv              |
-| 17 | Pow              | 18 | Const          | 19 | Transpose      | 20 | BatchNormalization    |
-| 21 | Conv2D           | 22 | BiasAdd        | 23 | MaxPool        | 24 | DepthwiseConv2D       |
-| 25 | Reshape          | 26 | AvgPool        | 27 | Where          | 28 | SquaredDifference     |
-| 29 | Neg              | 30 | Ceil           | 31 | Pad            | 32 | ~~ResizeBilinear~~    |
-| 33 | ReduceMean       | 34 | MatMul         | 35 | ArgMax         | 36 | ExpandDims            |
-| 37 | Slice            | 38 | Sum            | 39 | Max            | 40 | ~~LessEqual~~         |
-| 41 | ~~Cast~~         | 42 | Split          | 43 | Squeeze        | 44 | ~~ResizeNearestNeighbor~~ |
-| 45 | Softmax          | 46 | Range          | 47 | Size           | 48 |  Sqrt                   |
-| 49 | Identity         | 50 |~~GreaterEqual~~| 51 | Equal          | 52 | Minimum               |
-| 53 |                  | 54 | Fill           | 55 | Floor          | 56 |                       |
-| 57 | Sqrt             | 58 | Softplus       | 59 | Erf            | 60 |                       |
-
-
-
-## Pytorch
-
-| 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP |
-|------|------|------|------|------|------|------|------|
-| 1 | BatchNorm | 2 |  ConstantPad2d       | 3 | Conv2D              | 4 | Dropout   |
-| 5 | MaxPool2d | 6 |  adaptive_avg_pool2d | 7 | adaptive_max_pool2d | 8 | AvgPool2d |
-| 9 | abs       | 10|  absolute            | 11| acos                | 12 | add      |
-| 13| addmm     | 14|  arange              | 15| argmax              | 16 | argmin   |
-| 17| asin      | 18|  atan                | 19| baddbmm             | 20 | cat      |
-| 21| ceil      | 22|  ~~celu~~            | 23| clamp               | 24 | clamp_max|
-| 25| clamp_min | 26| concat               | 27| cos                 | 28 | ~~cumsum~~|
-| 29| div       | 30| elu                  | 31| eq                  | 32 | erf      |
-| 33| exp       | 34| ~~expand~~           | 35| flatten             | 36 | floor    |
-| 37|floor_divide|38| full                 | 39| full_like           | 40 | gather   |
-| 41| ~~ge~~    | 42| gelu                 | 43| ~~GroupNorm~~       | 44 |~~hardswish~~|
-| 45| hardtanh  | 46| ~~instance_norm~~    | 47| ~~interpolate~~     | 48 | ~~layer_norm~~|
-| 49| leaky_relu| 50| log                  | 51| log1p               | 52 | log2     |
-| 53| log_softmax|54| logsumexp            | 55|  max                | 56 | min      |
-| 57| mean      |58 | mm                   | 59| mul                 | 60 | neg      |
-| 61| ~~norm~~  | 62| ~~pixel_shuffle~~    | 63| pow                 | 64 | permute  |
-| 65| ~~prelu~~ | 66| relu                 | 67| reshape             | 68 | relu6    |
-| 69| softmax   | 70| slice                | 71| sub                 | 72 | sqrt     |
-| 73| sigmoid   | 74| prod                 | 75| reshape_as          | 76 | round    |
-| 77| rsqrt     | 78| ~~selu~~             | 79| sign                | 80 | sin      |
-| 81| softplus  | 82| split                | 83| squeeze             | 84 | sum      |
-| 85| tan       | 86| tanh                 | 87 | transpose          | 88 | unsqueeze|
-| 89| ~~upsample_nearest2d~~ | 90| prelu   |
-
-- hardswish pytorch导出存在bug
-- interpolate oneflow和pytorch的参数列表未完全对齐，只能转nearest和align_corners=False的情况，working
-
-
-## PaddlePaddle
-
-| 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP |
-|------|------|------|------|------|------|------|------|
-| 1 | abs       | 2 |  acos              | 3 | add       | 4 |  argmax    |
-| 5 | batch_norm| 6 | ~~bilinear_interp~~| 7 | bmm       | 8 |  ~~cast~~  |
-| 9 | clip      | 10| concat             | 11| conv2d    | 12| ~~conv2d_transpose~~|
-| 13| ~~cumsum~~| 14| depthwise_conv2d   | 15| dropout   | 16| elementwise_add|
-| 17| elementwise_div| 18| elementwise_mul | 19| elementwise_min | 20| elementwise_max|
-| 21| elementwise_pow| 22| elementwise_sub | 23| exp     | 24| expand_as  |
-| 25| expand_dims|26| flatten            | 27| floor     | 28| gather     |
-| 29| hardsigmoid|30| hardswish          | 31| leaky_relu| 32| log        |
-| 33| matmul    | 34|    mean            | 35| mul       | 36| ~~nearest_interp~~|
-| 37| pad2d     | 38| pow                | 39| ~~prelu~~ | 40| reduce_mean|
-| 41| reduce_max| 42| reduce_min         | 43| reduce_prod|44| reduce_sum |
-| 45| relu      | 46| relu6              | 47| reshape   | 48| softmax    |
-| 49| sigmoid   | 50| slice              | 51| scale     | 52| ~~split~   |
-| 53| squeeze   | 54| sqrt               | 55| square    | 56| stack      |
-| 57| stride_slice|58| sum               | 59| swish     | 60| tanh       |
-| 61| transpose | 62| unsqueeze| 
-
-
-相关issue：
-
-- https://github.com/PaddlePaddle/Paddle2ONNX/issues/221
-- https://github.com/PaddlePaddle/Paddle2ONNX/issues/220
diff --git a/docs/x2oneflow/x2oneflow_model_zoo.md b/docs/x2oneflow/x2oneflow_model_zoo.md
deleted file mode 100644
index 254aceec06100101225f4d9520aac1cfe1286116..0000000000000000000000000000000000000000
--- a/docs/x2oneflow/x2oneflow_model_zoo.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# X2OneFlow模型测试库
-
-> 目前X2OneFlow 支持80个ONNX OP，50+个TensorFlow OP，80+个Pytorch OP，50+个PaddlePaddle OP，覆盖了大部分CV分类模型常用的操作。注意我们支持的OP和模型均为动态图API下的OP和模型，要求PaddlePaddle的版本>=2.0.0，TensorFlow>=2.0.0，Pytorch无明确版本要求。我们在如下模型列表中测试了X2OneFlow的转换。
-
-## Pytorch
-
-| 模型         | 是否支持  |
-| ------------ | -------- |
-| LeNet        | Yes      |
-| AlexNet      | Yes      |
-| VGGNet       | Yes      |
-| GoogleNet    | Yes      |
-| ResNet       | Yes      |
-| PreActResNet | Yes      |
-| ResNext      | Yes      |
-| SENet        | Yes      |
-| MobileNetV1  | Yes      |
-| MobileNetV2  | Yes      |
-| MobileNetV3  | Yes      |
-| RegNet       | Yes      |
-| DenseNet     | Yes      |
-| EfficientNet | Yes      |
-| InceptionNet | Yes      |
-| ShuffleNetV1 | Yes      |
-| ShuffleNetV2 | Yes      |
-| SqueezeNet   | Yes      |
-| DPN          | Yes      |
-| PNASNet      | Yes      |
-| DLANet       | Yes      |
-
-## TensorFlow
-
-| 模型         | 是否支持  |
-| ------------ | -------- |
-| VGGNet       | Yes      |
-| ResNet       | Yes      |
-| ResNetV2     | Yes      |
-| XceptionNet  | Yes      |
-| MobileNetV1  | Yes      |
-| MobileNetV2  | Yes      |
-| MobileNetV3  | Yes      |
-| DenseNet     | Yes      |
-| EfficientNet | Yes      |
-| InceptionNet | Yes      |
-
-## PaddlePaddle
-
-| 模型               | 是否支持  |
-| ------------------ | -------- |
-| AlexNet            | Yes      |
-| VGGNet             | Yes      |
-| GoogleNet          | Yes      |
-| ResNet             | Yes      |
-| ResNext            | Yes      |
-| SE_ResNext         | Yes      |
-| SENet              | Yes      |
-| MobileNetV1        | Yes      |
-| MobileNetV2        | Yes      |
-| MobileNetV3        | Yes      |
-| RegNet             | Yes      |
-| DenseNet           | Yes      |
-| EfficientNet       | Yes      |
-| InceptionNet       | Yes      |
-| ShuffleNetV2       | Yes      |
-| SqueezeNet         | Yes      |
-| DPNNet             | Yes      |
-| DarkNet            | Yes      |
-| GhostNet           | Yes      |
-| RepVGG             | Yes      |
-| XceptionNet        | Yes      |
-| Xception_DeepLab   | Yes      |
-| Vision_Transformer | Yes      |
-| Res2Net            | Yes      |
-
-
-- 模型的测试代码均可以在本工程的examples中找到
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index 84063aff12899b2795786f76e827a9fed2a20232..fac33f66f4fc5a779dffc8a01d18590e79ade0e9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,4 +2,4 @@
 
 ## oneflow_onnx samples
 
-At present, oneflow2onnx supports the export of 80 + ONEFLOW OP to onnx Op. X2oneflow supports 80+ onnx OP's, 50 + TensorFlow OP's, 80 + Pytorch OP's and 50 + PaddlePaddle OP's, covering most common operations of CV classification model. Note that the OP and model we support are all OP and model under the dynamic graph API, which requires that the version of PaddlePaddle > = 2.0.0, TensorFlow > = 2.0.0, and there is no explicit version requirement for Pytorch. At present, x2oneflow has successfully converted 50 + TensorFlow/Pytorch/PaddlePaddle official models. We provide all the test examples in the 'oneflow2onnx' and 'x2oneflow' folders respectively. If you want to export onnx or convert the custom TensorFlow/Pytorch/PaddlePaddle network, you can modify it accordingly.
\ No newline at end of file
+At present, oneflow2onnx supports the export of 80 + ONEFLOW OP to onnx Op. 
\ No newline at end of file
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 559d20e3ebbb2c1cc0f86f5ff2d38bb9bc252c5e..177f09bd339c56ea9ed92426ac551a2e59e65548 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -1,5 +1,4 @@
 ## oneflow_onnx 使用示例
 
-目前OneFlow2ONNX 支持80+的OneFlow OP导出为ONNX OP。X2OneFlow支持80+个ONNX OP，50+个TensorFlow OP，80+个Pytorch OP，50+个PaddlePaddle OP，覆盖了大部分CV分类模型常用的操作。注意我们支持的OP和模型均为动态图API下的OP和模型，要求PaddlePaddle的版本>=2.0.0，TensorFlow>=2.0.0，Pytorch无明确版本要求。目前X2OneFlow已经成功转换了50+个TensorFlow/Pytorch/PaddlePaddle官方模型.我们分别在`oneflow2onnx`和`x2oneflow`文件夹下提供了所有的测试示例，需导出ONNX或者转换自定义的TensorFlow/Pytorch/PaddlePaddle网络可以对应修改使用。
-
+目前OneFlow2ONNX 支持80+的OneFlow OP导出为ONNX OP。
 
diff --git a/examples/oneflow2onnx/nodes/test_fake_quantization.py b/examples/oneflow2onnx/nodes/test_fake_quantization.py
deleted file mode 100644
index 739eb88e2f6fe0c91319920b81ee0f059a31126e..0000000000000000000000000000000000000000
--- a/examples/oneflow2onnx/nodes/test_fake_quantization.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
-
-
-def set_moving_max_min_value():
-    max_key, min_key = None, None
-    keys = flow.get_all_variables().keys()
-    for key in keys:
-        if max_key is not None and min_key is not None:
-            break
-        if key[-3:] == "max":
-            max_key = key
-        if key[-3:] == "min":
-            min_key = key
-    if max_key is not None and min_key is not None:
-        flow.load_variables(
-            {
-                max_key: np.array([0.5]).astype(np.float32),
-                min_key: np.array([-0.2]).astype(np.float32),
-            }
-        )
-
-
-def generate_fake_quantization_test(
-    per_layer: bool = True, scheme: str = "symmetric", device_type: str = "cpu",
-):
-    @flow.global_function()
-    def fake_quantization():
-        with flow.scope.placement(device_type, "0:0"):
-            x = flow.get_variable(
-                name="x1",
-                shape=(2, 3, 4),
-                dtype=flow.float,
-                initializer=flow.random_uniform_initializer(-10, 10),
-            )
-            return flow.quantization.fake_quantization(
-                x,
-                *flow.quantization.min_max_observer(
-                    x, per_layer_quantization=per_layer, quantization_scheme=scheme,
-                ),
-                quantization_scheme=scheme,
-            )
-
-    convert_to_onnx_and_check(fake_quantization, opset=10 if per_layer else 13)
-
-
-def generate_fake_quantization_test_moving_average(
-    scheme: str = "symmetric", device_type: str = "cpu",
-):
-    @flow.global_function()
-    def fake_quantization_moving_average():
-        with flow.scope.placement(device_type, "0:0"):
-            x = flow.get_variable(
-                name="x1",
-                shape=(2, 3, 4),
-                dtype=flow.float,
-                initializer=flow.random_uniform_initializer(-10, 10),
-            )
-            return flow.quantization.fake_quantization(
-                x,
-                *flow.quantization.moving_average_min_max_observer(
-                    x, quantization_scheme=scheme,
-                ),
-                quantization_scheme=scheme,
-            )
-
-    set_moving_max_min_value()
-
-    convert_to_onnx_and_check(
-        fake_quantization_moving_average, opset=10, explicit_init=False
-    )
-
-
-# min_max_observer
-def test_fake_quantization_symmetric():
-    generate_fake_quantization_test(per_layer=True, scheme="symmetric")
-
-
-def test_fake_quantization_symmetric_per_channel():
-    generate_fake_quantization_test(per_layer=False, scheme="symmetric")
-
-
-def test_fake_quantization_affine():
-    generate_fake_quantization_test(per_layer=True, scheme="affine")
-
-
-def test_fake_quantization_affine_per_channel():
-    generate_fake_quantization_test(per_layer=False, scheme="affine")
-
-
-def test_fake_quantization_symmetric_gpu():
-    generate_fake_quantization_test(
-        per_layer=True, scheme="symmetric", device_type="gpu"
-    )
-
-
-def test_fake_quantization_symmetric_per_channel_gpu():
-    generate_fake_quantization_test(
-        per_layer=False, scheme="symmetric", device_type="gpu"
-    )
-
-
-def test_fake_quantization_affine_gpu():
-    generate_fake_quantization_test(per_layer=True, scheme="affine", device_type="gpu")
-
-
-def test_fake_quantization_affine_per_channel_gpu():
-    generate_fake_quantization_test(per_layer=False, scheme="affine", device_type="gpu")
-
-
-# moving_average_min_max_observer
-def test_fake_quantization_symmetric_moving_average():
-    generate_fake_quantization_test_moving_average(scheme="symmetric")
-
-
-def test_fake_quantization_affine_moving_average():
-    generate_fake_quantization_test_moving_average(scheme="affine")
-
-
-def test_fake_quantization_symmetric_gpu_moving_average():
-    generate_fake_quantization_test_moving_average(
-        scheme="symmetric", device_type="gpu"
-    )
-
-
-def test_fake_quantization_affine_gpu_moving_average():
-    generate_fake_quantization_test_moving_average(scheme="affine", device_type="gpu")
diff --git a/examples/oneflow2onnx/nodes/test_flatten.py b/examples/oneflow2onnx/nodes/test_flatten.py
index 3357bd9b9b902a9a8458b40f2be2899e03b87d38..df327cc9a7177253a17e9b821ed759d0d0746908 100644
--- a/examples/oneflow2onnx/nodes/test_flatten.py
+++ b/examples/oneflow2onnx/nodes/test_flatten.py
@@ -13,36 +13,36 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+import tempfile
 import oneflow as flow
-import oneflow.typing as tp
 from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
 
+class Flatten(flow.nn.Module):
+    def __init__(self) -> None:
+        super(Flatten, self).__init__()
+        self.flatten = flow.nn.Flatten()
 
-def test_flatten():
-    @flow.global_function()
-    def flatten(x: tp.Numpy.Placeholder((3, 4, 2, 5))):
-        return flow.flatten(x, start_dim=1, end_dim=-1)
-
-    convert_to_onnx_and_check(flatten)
+    def forward(self, x: flow.Tensor) -> flow.Tensor:
+        return self.flatten(x)
 
-def test_flatten_aixs_negative():
-    @flow.global_function()
-    def flatten(x: tp.Numpy.Placeholder((3, 4, 2, 5))):
-        return flow.flatten(x, start_dim=0, end_dim=-1)
+flatten = Flatten()
+class flattenOpGraph(flow.nn.Graph):
+    def __init__(self):
+        super().__init__()
+        self.m = flatten
 
-    convert_to_onnx_and_check(flatten)
+    def build(self, x):
+        out = self.m(x)
+        return out
 
-def test_flatten_aixs_default():
-    @flow.global_function()
-    def flatten(x: tp.Numpy.Placeholder((3, 4, 2, 5))):
-        return flow.flatten(x)
 
-    convert_to_onnx_and_check(flatten)
+def test_flatten():
+    
+    flatten_graph = flattenOpGraph()
+    flatten_graph._compile(flow.randn(1, 3, 224, 224))
 
-def test_flatten_dtype_int():
-    @flow.global_function()
-    def flatten(x: tp.Numpy.Placeholder((3, 4, 2, 5))):
-        x = flow.cast(x, flow.int32)
-        return flow.flatten(x)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        flow.save(flatten.state_dict(), tmpdirname)
+        convert_to_onnx_and_check(flatten_graph, flow_weight_dir=tmpdirname, onnx_model_path="/tmp")
 
-    convert_to_onnx_and_check(flatten, opset=11)
+test_flatten()
diff --git a/examples/oneflow2onnx/nodes/test_gather_nd.py b/examples/oneflow2onnx/nodes/test_gather_nd.py
deleted file mode 100644
index 7459a5a2390d27132ae9259a0aea9bcfeffad354..0000000000000000000000000000000000000000
--- a/examples/oneflow2onnx/nodes/test_gather_nd.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import oneflow as flow
-import oneflow.typing as tp
-from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
-
-
-def test_gather_nd():
-    @flow.global_function()
-    def gather_nd():
-        x = flow.get_variable(
-            name="x",
-            shape=(2, 3, 4),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        y = flow.get_variable(
-            name="y",
-            shape=(2, 3),
-            dtype=flow.int64,
-            initializer=flow.random_uniform_initializer(0, 1, flow.int64),
-        )
-        return flow.gather_nd(x, y)
-
-    convert_to_onnx_and_check(gather_nd, opset=11)
diff --git a/examples/oneflow2onnx/nodes/test_large_array.py b/examples/oneflow2onnx/nodes/test_large_array.py
deleted file mode 100644
index 35e74f96c2b2fdd66c9de31f7e0c35da15a9a403..0000000000000000000000000000000000000000
--- a/examples/oneflow2onnx/nodes/test_large_array.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import oneflow as flow
-import oneflow.typing as tp
-from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
-
-
-def test_large_array():
-    @flow.global_function()
-    def add_with_large_array():
-        large_shape = (256 * 1024 * 1024 + 1,)
-        x = flow.get_variable(
-            name="x",
-            shape=large_shape,
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        y = flow.get_variable(
-            name="y",
-            shape=large_shape,
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.math.add_n([x, y])
-
-    # ONNX Runtime optimizers doesn't support external data
-    convert_to_onnx_and_check(
-        add_with_large_array, external_data=True, ort_optimize=False
-    )
diff --git a/examples/oneflow2onnx/nodes/test_matmul.py b/examples/oneflow2onnx/nodes/test_matmul.py
index 9d9ab072f1da185e588bdd2e8e9ca7689b91c6bb..8a18f5e70734990b0f5d23c5cec7e09354489d12 100644
--- a/examples/oneflow2onnx/nodes/test_matmul.py
+++ b/examples/oneflow2onnx/nodes/test_matmul.py
@@ -13,105 +13,36 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+import tempfile
 import oneflow as flow
 from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
 
+class MatMul(flow.nn.Module):
+    def __init__(self) -> None:
+        super(MatMul, self).__init__()
+        self.matmul = flow.nn.Linear(20, 30)
 
-def test_matmul():
-    @flow.global_function()
-    def matmul():
-        a = flow.get_variable(
-            name="a",
-            shape=(2, 3),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        b = flow.get_variable(
-            name="b",
-            shape=(3, 4),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.matmul(a, b)
-
-    convert_to_onnx_and_check(matmul)
-
-
-def test_matmul_ta():
-    @flow.global_function()
-    def matmul():
-        a = flow.get_variable(
-            name="a",
-            shape=(3, 2),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        b = flow.get_variable(
-            name="b",
-            shape=(3, 4),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.matmul(a, b, transpose_a=True)
-
-    convert_to_onnx_and_check(matmul)
+    def forward(self, x: flow.Tensor) -> flow.Tensor:
+        return self.matmul(x)
 
+matmul = MatMul()
+class matmulOpGraph(flow.nn.Graph):
+    def __init__(self):
+        super().__init__()
+        self.m = matmul
 
-def test_matmul_tb():
-    @flow.global_function()
-    def matmul():
-        a = flow.get_variable(
-            name="a",
-            shape=(2, 3),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        b = flow.get_variable(
-            name="b",
-            shape=(4, 3),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.matmul(a, b, transpose_b=True)
+    def build(self, x):
+        out = self.m(x)
+        return out
 
-    convert_to_onnx_and_check(matmul)
-
-
-def test_matmul_ta_tb():
-    @flow.global_function()
-    def matmul():
-        a = flow.get_variable(
-            name="a",
-            shape=(3, 2),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        b = flow.get_variable(
-            name="b",
-            shape=(4, 3),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.matmul(a, b, transpose_a=True, transpose_b=True)
-
-    convert_to_onnx_and_check(matmul)
 
+def test_matmul():
+    
+    matmul_graph = matmulOpGraph()
+    matmul_graph._compile(flow.randn(1, 20))
 
-def test_batch_matmul():
-    @flow.global_function()
-    def matmul():
-        a = flow.get_variable(
-            name="a",
-            shape=(4, 2, 3),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        b = flow.get_variable(
-            name="b",
-            shape=(4, 3, 4),
-            dtype=flow.float,
-            initializer=flow.random_uniform_initializer(),
-        )
-        return flow.matmul(a, b)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        flow.save(matmul.state_dict(), tmpdirname)
+        convert_to_onnx_and_check(matmul_graph, flow_weight_dir=tmpdirname, onnx_model_path="/tmp")
 
-    convert_to_onnx_and_check(matmul)
+test_matmul()
diff --git a/examples/oneflow2onnx/nodes/test_min_max_observer.py b/examples/oneflow2onnx/nodes/test_min_max_observer.py
deleted file mode 100644
index 6dc187e4a348bfb91bcf2b78775ab02c83e78696..0000000000000000000000000000000000000000
--- a/examples/oneflow2onnx/nodes/test_min_max_observer.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import oneflow as flow
-from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
-
-
-def generate_min_max_observer_test(
-    out_pos: int,
-    per_layer: bool,
-    formula: str,
-    scheme: str,
-    device_type: str = "cpu",
-    opset: int = 10,
-):
-    @flow.global_function()
-    def min_max_observer():
-        with flow.scope.placement(device_type, "0:0"):
-            x = flow.get_variable(
-                name="x1",
-                shape=(2, 3, 4),
-                dtype=flow.float,
-                initializer=flow.random_uniform_initializer(-10, 10),
-            )
-            return flow.quantization.min_max_observer(
-                x,
-                per_layer_quantization=per_layer,
-                quantization_formula=formula,
-                quantization_scheme=scheme,
-            )[out_pos]
-
-    convert_to_onnx_and_check(min_max_observer, opset=opset)
-
-
-def test_min_max_observer_symmetric():
-    generate_min_max_observer_test(0, True, "google", "symmetric")
-
-
-def test_min_max_observer_symmetric_zero_point():
-    generate_min_max_observer_test(1, True, "google", "symmetric")
-
-
-def test_min_max_observer_affine():
-    generate_min_max_observer_test(0, True, "google", "affine")
-
-
-def test_min_max_observer_affine_zero_point():
-    generate_min_max_observer_test(1, True, "google", "affine")
-
-
-def test_min_max_observer_symmetric_per_channel():
-    generate_min_max_observer_test(0, False, "google", "symmetric", opset=13)
-
-
-def test_min_max_observer_symmetric_per_channel_zero_point():
-    generate_min_max_observer_test(1, False, "google", "symmetric", opset=13)
-
-
-def test_min_max_observer_affine_per_channel():
-    generate_min_max_observer_test(0, False, "google", "affine", opset=13)
-
-
-def test_min_max_observer_affine_per_channel_zero_point():
-    generate_min_max_observer_test(1, False, "google", "affine", opset=13)
-
-
-def test_min_max_observer_cambricon():
-    generate_min_max_observer_test(0, True, "cambricon", "symmetric")
-
-
-def test_min_max_observer_cambricon_zero_point():
-    generate_min_max_observer_test(1, True, "cambricon", "symmetric")
-
-
-def test_min_max_observer_symmetric_gpu():
-    generate_min_max_observer_test(0, True, "google", "symmetric", device_type="gpu")
-
-
-def test_min_max_observer_symmetric_zero_point_gpu():
-    generate_min_max_observer_test(1, True, "google", "symmetric", device_type="gpu")
-
-
-def test_min_max_observer_affine_gpu():
-    generate_min_max_observer_test(0, True, "google", "affine", device_type="gpu")
-
-
-def test_min_max_observer_affine_zero_point_gpu():
-    generate_min_max_observer_test(1, True, "google", "affine", device_type="gpu")
-
-
-def test_min_max_observer_symmetric_per_channel_gpu():
-    generate_min_max_observer_test(
-        0, False, "google", "symmetric", device_type="gpu", opset=13
-    )
-
-
-def test_min_max_observer_symmetric_per_channel_zero_point_gpu():
-    generate_min_max_observer_test(
-        1, False, "google", "symmetric", device_type="gpu", opset=13
-    )
-
-
-def test_min_max_observer_affine_per_channel_gpu():
-    generate_min_max_observer_test(
-        0, False, "google", "affine", device_type="gpu", opset=13
-    )
-
-
-def test_min_max_observer_affine_per_channel_zero_point_gpu():
-    generate_min_max_observer_test(
-        1, False, "google", "affine", device_type="gpu", opset=13
-    )
-
-
-def test_min_max_observer_cambricon_gpu():
-    generate_min_max_observer_test(0, True, "cambricon", "symmetric", device_type="gpu")
-
-
-def test_min_max_observer_cambricon_zero_point_gpu():
-    generate_min_max_observer_test(1, True, "cambricon", "symmetric", device_type="gpu")
diff --git a/examples/oneflow2onnx/nodes/test_moving_average_max_min_observer.py b/examples/oneflow2onnx/nodes/test_moving_average_max_min_observer.py
deleted file mode 100644
index 84007a286f28f60b9a627f518ccfacd8648ccaa7..0000000000000000000000000000000000000000
--- a/examples/oneflow2onnx/nodes/test_moving_average_max_min_observer.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
-
-
-def set_moving_max_min_value():
-    max_key, min_key = "", ""
-    keys = flow.get_all_variables().keys()
-    for key in keys:
-        if max_key != "" and min_key != "":
-            break
-        if key[-3:] == "max":
-            max_key = key
-        if key[-3:] == "min":
-            min_key = key
-    flow.load_variables(
-        {
-            max_key: np.array([0.5]).astype(np.float32),
-            min_key: np.array([-0.2]).astype(np.float32),
-        }
-    )
-
-
-def generate_moving_average_min_max_observer_test(
-    out_pos: int, formula: str, scheme: str = "symmetric", device_type: str = "cpu",
-):
-    flow.clear_default_session()
-
-    @flow.global_function()
-    def moving_average_min_max_observer():
-        with flow.scope.placement(device_type, "0:0"):
-            x = flow.get_variable(
-                name="x1",
-                shape=(2, 3, 4),
-                dtype=flow.float,
-                initializer=flow.random_uniform_initializer(-10, 10),
-            )
-            return flow.quantization.moving_average_min_max_observer(
-                x, quantization_formula=formula, quantization_scheme=scheme
-            )[out_pos]
-
-    set_moving_max_min_value()
-
-    convert_to_onnx_and_check(
-        moving_average_min_max_observer, opset=10, explicit_init=False
-    )
-
-
-def test_moving_average_min_max_observer_symmetric():
-    generate_moving_average_min_max_observer_test(0, "google", "symmetric")
-
-
-def test_moving_average_min_max_observer_symmetric_zero_point():
-    generate_moving_average_min_max_observer_test(1, "google", "symmetric")
-
-
-def test_moving_average_min_max_observer_affine():
-    generate_moving_average_min_max_observer_test(0, "google", "affine")
-
-
-def test_moving_average_min_max_observer_affine_zero_point():
-    generate_moving_average_min_max_observer_test(1, "google", "affine")
-
-
-def test_moving_average_min_max_observer_cambricon():
-    generate_moving_average_min_max_observer_test(0, "cambricon")
-
-
-def test_moving_average_min_max_observer_cambricon_zero_point():
-    generate_moving_average_min_max_observer_test(1, "cambricon")
-
-
-def test_moving_average_min_max_observer_symmetric_gpu():
-    generate_moving_average_min_max_observer_test(
-        0, "google", "symmetric", device_type="gpu"
-    )
-
-
-def test_moving_average_min_max_observer_symmetric_zero_point_gpu():
-    generate_moving_average_min_max_observer_test(
-        1, "google", "symmetric", device_type="gpu"
-    )
-
-
-def test_moving_average_min_max_observer_affine_gpu():
-    generate_moving_average_min_max_observer_test(
-        0, "google", "affine", device_type="gpu"
-    )
-
-
-def test_moving_average_min_max_observer_affine_zero_point_gpu():
-    generate_moving_average_min_max_observer_test(
-        1, "google", "affine", device_type="gpu"
-    )
-
-
-def test_moving_average_min_max_observer_cambricon_gpu():
-    generate_moving_average_min_max_observer_test(0, "cambricon", device_type="gpu")
-
-
-def test_moving_average_min_max_observer_cambricon_zero_point_gpu():
-    generate_moving_average_min_max_observer_test(1, "cambricon", device_type="gpu")
diff --git a/examples/test_code_gen.sh b/examples/test_code_gen.sh
deleted file mode 100755
index 8d22cf8fd67555dfc79c7ed2552a44467b7d33e9..0000000000000000000000000000000000000000
--- a/examples/test_code_gen.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-read_dir(){
-    for file in `ls -a $1`
-    do
-        if [ -d $1"/"$file ]
-        then
-            if [[ $file != '.' && $file != '..' ]]
-            then
-                read_dir $1"/"$file
-            fi
-        else
-            check_suffix $1"/"$file
-        fi
-    done
-}
-
-check_suffix()
-{
-    file=$1
-    
-    if [ "${file##*.}"x = "py"x ];then
-        python3 -m pytest $file -v -s
-        python3 /tmp/oneflow_code.py
-    fi    
-}
- 
-path="examples/x2oneflow/pytorch2oneflow/code_gen"
-read_dir $path
-path="examples/x2oneflow/tensorflow2oneflow/code_gen"
-read_dir $path
-path="examples/x2oneflow/paddle2oneflow/code_gen"
-read_dir $path
\ No newline at end of file
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_alexnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_alexnet.py
deleted file mode 100644
index 27d7bf865074785d3c31b0adb3c12b267a1e7fd3..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_alexnet.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/modeling/architectures/alexnet.py
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-_all__ = ["AlexNet"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvPoolLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride,
-        padding,
-        stdv,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvPoolLayer, self).__init__()
-
-        self.relu = ReLU() if act == "relu" else None
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(
-                name=name + "_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(
-                name=name + "_offset", initializer=Uniform(-stdv, stdv)
-            ),
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        if self.relu is not None:
-            x = self.relu(x)
-        x = self._pool(x)
-        return x
-
-
-class AlexNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(AlexNetDY, self).__init__()
-
-        stdv = 1.0 / math.sqrt(3 * 11 * 11)
-        self._conv1 = ConvPoolLayer(3, 64, 11, 4, 2, stdv, act="relu", name="conv1")
-        stdv = 1.0 / math.sqrt(64 * 5 * 5)
-        self._conv2 = ConvPoolLayer(64, 192, 5, 1, 2, stdv, act="relu", name="conv2")
-        stdv = 1.0 / math.sqrt(192 * 3 * 3)
-        self._conv3 = Conv2D(
-            192,
-            384,
-            3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(
-                name="conv3_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="conv3_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        stdv = 1.0 / math.sqrt(384 * 3 * 3)
-        self._conv4 = Conv2D(
-            384,
-            256,
-            3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(
-                name="conv4_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="conv4_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        stdv = 1.0 / math.sqrt(256 * 3 * 3)
-        self._conv5 = ConvPoolLayer(256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
-        stdv = 1.0 / math.sqrt(256 * 6 * 6)
-
-        self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc6 = Linear(
-            in_features=256 * 6 * 6,
-            out_features=4096,
-            weight_attr=ParamAttr(name="fc6_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc6_offset", initializer=Uniform(-stdv, stdv)),
-        )
-
-        self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc7 = Linear(
-            in_features=4096,
-            out_features=4096,
-            weight_attr=ParamAttr(name="fc7_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc7_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        self._fc8 = Linear(
-            in_features=4096,
-            out_features=class_dim,
-            weight_attr=ParamAttr(name="fc8_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc8_offset", initializer=Uniform(-stdv, stdv)),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        x = self._conv3(x)
-        x = F.relu(x)
-        x = self._conv4(x)
-        x = F.relu(x)
-        x = self._conv5(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self._drop1(x)
-        x = self._fc6(x)
-        x = F.relu(x)
-        x = self._drop2(x)
-        x = self._fc7(x)
-        x = F.relu(x)
-        x = self._fc8(x)
-        return x
-
-
-def AlexNet(**args):
-    model = AlexNetDY(**args)
-    return model
-
-
-def test_alexnet():
-    load_paddle_module_and_check(
-        AlexNet, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_darknet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_darknet.py
deleted file mode 100644
index d1eba2639e5990e36502072588a601e1b730b13e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_darknet.py
+++ /dev/null
@@ -1,180 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/modeling/architectures/darknet.py
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["DarkNet53"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, filter_size, stride, padding, name=None
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + ".conv.weights"),
-            bias_attr=False,
-        )
-
-        bn_name = name + ".bn"
-        self._bn = BatchNorm(
-            num_channels=output_channels,
-            act="relu",
-            param_attr=ParamAttr(name=bn_name + ".scale"),
-            bias_attr=ParamAttr(name=bn_name + ".offset"),
-            moving_mean_name=bn_name + ".mean",
-            moving_variance_name=bn_name + ".var",
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._bn(x)
-        return x
-
-
-class BasicBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, name=None):
-        super(BasicBlock, self).__init__()
-
-        self._conv1 = ConvBNLayer(
-            input_channels, output_channels, 1, 1, 0, name=name + ".0"
-        )
-        self._conv2 = ConvBNLayer(
-            output_channels, output_channels * 2, 3, 1, 1, name=name + ".1"
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        return paddle.add(x=inputs, y=x)
-
-
-class DarkNet(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(DarkNet, self).__init__()
-
-        self.stages = [1, 2, 8, 8, 4]
-        self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input")
-        self._conv2 = ConvBNLayer(32, 64, 3, 2, 1, name="yolo_input.downsample")
-
-        self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0")
-        self._downsample_0 = ConvBNLayer(64, 128, 3, 2, 1, name="stage.0.downsample")
-
-        self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0")
-        self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1")
-        self._downsample_1 = ConvBNLayer(128, 256, 3, 2, 1, name="stage.1.downsample")
-
-        self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0")
-        self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1")
-        self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2")
-        self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3")
-        self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4")
-        self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5")
-        self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6")
-        self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7")
-        self._downsample_2 = ConvBNLayer(256, 512, 3, 2, 1, name="stage.2.downsample")
-
-        self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0")
-        self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1")
-        self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2")
-        self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3")
-        self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4")
-        self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5")
-        self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6")
-        self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7")
-        self._downsample_3 = ConvBNLayer(512, 1024, 3, 2, 1, name="stage.3.downsample")
-
-        self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0")
-        self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1")
-        self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2")
-        self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3")
-
-        self._pool = AdaptiveAvgPool2D(1)
-
-        stdv = 1.0 / math.sqrt(1024.0)
-        self._out = Linear(
-            1024,
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-
-        x = self._basic_block_01(x)
-        x = self._downsample_0(x)
-
-        x = self._basic_block_11(x)
-        x = self._basic_block_12(x)
-        x = self._downsample_1(x)
-
-        x = self._basic_block_21(x)
-        x = self._basic_block_22(x)
-        x = self._basic_block_23(x)
-        x = self._basic_block_24(x)
-        x = self._basic_block_25(x)
-        x = self._basic_block_26(x)
-        x = self._basic_block_27(x)
-        x = self._basic_block_28(x)
-        x = self._downsample_2(x)
-
-        x = self._basic_block_31(x)
-        x = self._basic_block_32(x)
-        x = self._basic_block_33(x)
-        x = self._basic_block_34(x)
-        x = self._basic_block_35(x)
-        x = self._basic_block_36(x)
-        x = self._basic_block_37(x)
-        x = self._basic_block_38(x)
-        x = self._downsample_3(x)
-
-        x = self._basic_block_41(x)
-        x = self._basic_block_42(x)
-        x = self._basic_block_43(x)
-        x = self._basic_block_44(x)
-
-        x = self._pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._out(x)
-        return x
-
-
-def DarkNet53(**args):
-    model = DarkNet(**args)
-    return model
-
-
-def test_darknet():
-    load_paddle_module_and_check(
-        DarkNet53, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_densenet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_densenet.py
deleted file mode 100644
index b4ecf162f16623fa53788abf494fd5ac9a5b5460..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_densenet.py
+++ /dev/null
@@ -1,332 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", "DenseNet264"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class BNACConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(BNACConvLayer, self).__init__()
-
-        self._batch_norm = BatchNorm(
-            num_channels,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, input):
-        y = self._batch_norm(input)
-        y = self._conv(y)
-        return y
-
-
-class DenseLayer(nn.Layer):
-    def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None):
-        super(DenseLayer, self).__init__()
-        self.dropout = dropout
-
-        self.bn_ac_func1 = BNACConvLayer(
-            num_channels=num_channels,
-            num_filters=bn_size * growth_rate,
-            filter_size=1,
-            pad=0,
-            stride=1,
-            name=name + "_x1",
-        )
-
-        self.bn_ac_func2 = BNACConvLayer(
-            num_channels=bn_size * growth_rate,
-            num_filters=growth_rate,
-            filter_size=3,
-            pad=1,
-            stride=1,
-            name=name + "_x2",
-        )
-
-        if dropout:
-            self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
-
-    def forward(self, input):
-        conv = self.bn_ac_func1(input)
-        conv = self.bn_ac_func2(conv)
-        if self.dropout:
-            conv = self.dropout_func(conv)
-        conv = paddle.concat([input, conv], axis=1)
-        return conv
-
-
-class DenseBlock(nn.Layer):
-    def __init__(
-        self, num_channels, num_layers, bn_size, growth_rate, dropout, name=None
-    ):
-        super(DenseBlock, self).__init__()
-        self.dropout = dropout
-
-        self.dense_layer_func = []
-
-        pre_channel = num_channels
-        for layer in range(num_layers):
-            self.dense_layer_func.append(
-                self.add_sublayer(
-                    "{}_{}".format(name, layer + 1),
-                    DenseLayer(
-                        num_channels=pre_channel,
-                        growth_rate=growth_rate,
-                        bn_size=bn_size,
-                        dropout=dropout,
-                        name=name + "_" + str(layer + 1),
-                    ),
-                )
-            )
-            pre_channel = pre_channel + growth_rate
-
-    def forward(self, input):
-        conv = input
-        for func in self.dense_layer_func:
-            conv = func(conv)
-        return conv
-
-
-class TransitionLayer(nn.Layer):
-    def __init__(self, num_channels, num_output_features, name=None):
-        super(TransitionLayer, self).__init__()
-
-        self.conv_ac_func = BNACConvLayer(
-            num_channels=num_channels,
-            num_filters=num_output_features,
-            filter_size=1,
-            pad=0,
-            stride=1,
-            name=name,
-        )
-
-        self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0)
-
-    def forward(self, input):
-        y = self.conv_ac_func(input)
-        y = self.pool2d_avg(y)
-        return y
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        return y
-
-
-class DenseNet(nn.Layer):
-    def __init__(self, layers=60, bn_size=4, dropout=0, class_dim=1000):
-        super(DenseNet, self).__init__()
-
-        supported_layers = [121, 161, 169, 201, 264]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        densenet_spec = {
-            121: (64, 32, [6, 12, 24, 16]),
-            161: (96, 48, [6, 12, 36, 24]),
-            169: (64, 32, [6, 12, 32, 32]),
-            201: (64, 32, [6, 12, 48, 32]),
-            264: (64, 32, [6, 12, 64, 48]),
-        }
-        num_init_features, growth_rate, block_config = densenet_spec[layers]
-
-        self.conv1_func = ConvBNLayer(
-            num_channels=3,
-            num_filters=num_init_features,
-            filter_size=7,
-            stride=2,
-            pad=3,
-            act="relu",
-            name="conv1",
-        )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_config = block_config
-
-        self.dense_block_func_list = []
-        self.transition_func_list = []
-        pre_num_channels = num_init_features
-        num_features = num_init_features
-        for i, num_layers in enumerate(block_config):
-            self.dense_block_func_list.append(
-                self.add_sublayer(
-                    "db_conv_{}".format(i + 2),
-                    DenseBlock(
-                        num_channels=pre_num_channels,
-                        num_layers=num_layers,
-                        bn_size=bn_size,
-                        growth_rate=growth_rate,
-                        dropout=dropout,
-                        name="conv" + str(i + 2),
-                    ),
-                )
-            )
-
-            num_features = num_features + num_layers * growth_rate
-            pre_num_channels = num_features
-
-            if i != len(block_config) - 1:
-                self.transition_func_list.append(
-                    self.add_sublayer(
-                        "tr_conv{}_blk".format(i + 2),
-                        TransitionLayer(
-                            num_channels=pre_num_channels,
-                            num_output_features=num_features // 2,
-                            name="conv" + str(i + 2) + "_blk",
-                        ),
-                    )
-                )
-                pre_num_channels = num_features // 2
-                num_features = num_features // 2
-
-        self.batch_norm = BatchNorm(
-            num_features,
-            act="relu",
-            param_attr=ParamAttr(name="conv5_blk_bn_scale"),
-            bias_attr=ParamAttr(name="conv5_blk_bn_offset"),
-            moving_mean_name="conv5_blk_bn_mean",
-            moving_variance_name="conv5_blk_bn_variance",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        stdv = 1.0 / math.sqrt(num_features * 1.0)
-
-        self.out = Linear(
-            num_features,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, input):
-        conv = self.conv1_func(input)
-        conv = self.pool2d_max(conv)
-
-        for i, num_layers in enumerate(self.block_config):
-            conv = self.dense_block_func_list[i](conv)
-            if i != len(self.block_config) - 1:
-                conv = self.transition_func_list[i](conv)
-
-        conv = self.batch_norm(conv)
-        y = self.pool2d_avg(conv)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        # y = paddle.reshape(y, [y.shape[0], -1])
-        y = self.out(y)
-        return y
-
-
-def DenseNet121(**args):
-    model = DenseNet(layers=121, **args)
-    return model
-
-
-def DenseNet161(**args):
-    model = DenseNet(layers=161, **args)
-    return model
-
-
-def DenseNet169(**args):
-    model = DenseNet(layers=169, **args)
-    return model
-
-
-def DenseNet201(**args):
-    model = DenseNet(layers=201, **args)
-    return model
-
-
-def DenseNet264(**args):
-    model = DenseNet(layers=264, **args)
-    return model
-
-
-def test_densenet121():
-    load_paddle_module_and_check(
-        DenseNet121, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_dpn.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_dpn.py
deleted file mode 100644
index 8c46414f68912229450143ca8be1248b13bb131b..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_dpn.py
+++ /dev/null
@@ -1,452 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import sys
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = [
-    "DPN",
-    "DPN68",
-    "DPN92",
-    "DPN98",
-    "DPN107",
-    "DPN131",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        return y
-
-
-class BNACConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(BNACConvLayer, self).__init__()
-        self.num_channels = num_channels
-
-        self._batch_norm = BatchNorm(
-            num_channels,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, input):
-        y = self._batch_norm(input)
-        y = self._conv(y)
-        return y
-
-
-class DualPathFactory(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_1x1_a,
-        num_3x3_b,
-        num_1x1_c,
-        inc,
-        G,
-        _type="normal",
-        name=None,
-    ):
-        super(DualPathFactory, self).__init__()
-
-        self.num_1x1_c = num_1x1_c
-        self.inc = inc
-        self.name = name
-
-        kw = 3
-        kh = 3
-        pw = (kw - 1) // 2
-        ph = (kh - 1) // 2
-
-        # type
-        if _type == "proj":
-            key_stride = 1
-            self.has_proj = True
-        elif _type == "down":
-            key_stride = 2
-            self.has_proj = True
-        elif _type == "normal":
-            key_stride = 1
-            self.has_proj = False
-        else:
-            print("not implemented now!!!")
-            sys.exit(1)
-
-        data_in_ch = (
-            sum(num_channels) if isinstance(num_channels, list) else num_channels
-        )
-
-        if self.has_proj:
-            self.c1x1_w_func = BNACConvLayer(
-                num_channels=data_in_ch,
-                num_filters=num_1x1_c + 2 * inc,
-                filter_size=(1, 1),
-                pad=(0, 0),
-                stride=(key_stride, key_stride),
-                name=name + "_match",
-            )
-
-        self.c1x1_a_func = BNACConvLayer(
-            num_channels=data_in_ch,
-            num_filters=num_1x1_a,
-            filter_size=(1, 1),
-            pad=(0, 0),
-            name=name + "_conv1",
-        )
-
-        self.c3x3_b_func = BNACConvLayer(
-            num_channels=num_1x1_a,
-            num_filters=num_3x3_b,
-            filter_size=(kw, kh),
-            pad=(pw, ph),
-            stride=(key_stride, key_stride),
-            groups=G,
-            name=name + "_conv2",
-        )
-
-        self.c1x1_c_func = BNACConvLayer(
-            num_channels=num_3x3_b,
-            num_filters=num_1x1_c + inc,
-            filter_size=(1, 1),
-            pad=(0, 0),
-            name=name + "_conv3",
-        )
-
-    def forward(self, input):
-        # PROJ
-        if isinstance(input, list):
-            data_in = paddle.concat([input[0], input[1]], axis=1)
-        else:
-            data_in = input
-
-        if self.has_proj:
-            c1x1_w = self.c1x1_w_func(data_in)
-            data_o1, data_o2 = paddle.split(
-                c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1
-            )
-        else:
-            data_o1 = input[0]
-            data_o2 = input[1]
-
-        c1x1_a = self.c1x1_a_func(data_in)
-        c3x3_b = self.c3x3_b_func(c1x1_a)
-        c1x1_c = self.c1x1_c_func(c3x3_b)
-
-        c1x1_c1, c1x1_c2 = paddle.split(
-            c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1
-        )
-
-        # OUTPUTS
-        summ = paddle.add(x=data_o1, y=c1x1_c1)
-        dense = paddle.concat([data_o2, c1x1_c2], axis=1)
-        # tensor, channels
-        return [summ, dense]
-
-
-class DPN(nn.Layer):
-    def __init__(self, layers=68, class_dim=1000):
-        super(DPN, self).__init__()
-
-        self._class_dim = class_dim
-
-        args = self.get_net_args(layers)
-        bws = args["bw"]
-        inc_sec = args["inc_sec"]
-        rs = args["r"]
-        k_r = args["k_r"]
-        k_sec = args["k_sec"]
-        G = args["G"]
-        init_num_filter = args["init_num_filter"]
-        init_filter_size = args["init_filter_size"]
-        init_padding = args["init_padding"]
-
-        self.k_sec = k_sec
-
-        self.conv1_x_1_func = ConvBNLayer(
-            num_channels=3,
-            num_filters=init_num_filter,
-            filter_size=init_filter_size,
-            stride=2,
-            pad=init_padding,
-            act="relu",
-            name="conv1",
-        )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        num_channel_dpn = init_num_filter
-
-        self.dpn_func_list = []
-        # conv2 - conv5
-        match_list, num = [], 0
-        for gc in range(4):
-            bw = bws[gc]
-            inc = inc_sec[gc]
-            R = (k_r * bw) // rs[gc]
-            if gc == 0:
-                _type1 = "proj"
-                _type2 = "normal"
-                match = 1
-            else:
-                _type1 = "down"
-                _type2 = "normal"
-                match = match + k_sec[gc - 1]
-            match_list.append(match)
-            self.dpn_func_list.append(
-                self.add_sublayer(
-                    "dpn{}".format(match),
-                    DualPathFactory(
-                        num_channels=num_channel_dpn,
-                        num_1x1_a=R,
-                        num_3x3_b=R,
-                        num_1x1_c=bw,
-                        inc=inc,
-                        G=G,
-                        _type=_type1,
-                        name="dpn" + str(match),
-                    ),
-                )
-            )
-            num_channel_dpn = [bw, 3 * inc]
-
-            for i_ly in range(2, k_sec[gc] + 1):
-                num += 1
-                if num in match_list:
-                    num += 1
-                self.dpn_func_list.append(
-                    self.add_sublayer(
-                        "dpn{}".format(num),
-                        DualPathFactory(
-                            num_channels=num_channel_dpn,
-                            num_1x1_a=R,
-                            num_3x3_b=R,
-                            num_1x1_c=bw,
-                            inc=inc,
-                            G=G,
-                            _type=_type2,
-                            name="dpn" + str(num),
-                        ),
-                    )
-                )
-
-                num_channel_dpn = [num_channel_dpn[0], num_channel_dpn[1] + inc]
-
-        out_channel = sum(num_channel_dpn)
-
-        self.conv5_x_x_bn = BatchNorm(
-            num_channels=sum(num_channel_dpn),
-            act="relu",
-            param_attr=ParamAttr(name="final_concat_bn_scale"),
-            bias_attr=ParamAttr("final_concat_bn_offset"),
-            moving_mean_name="final_concat_bn_mean",
-            moving_variance_name="final_concat_bn_variance",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        stdv = 0.01
-
-        self.out = Linear(
-            out_channel,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, input):
-        conv1_x_1 = self.conv1_x_1_func(input)
-        convX_x_x = self.pool2d_max(conv1_x_1)
-
-        dpn_idx = 0
-        for gc in range(4):
-            convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
-            dpn_idx += 1
-            for i_ly in range(2, self.k_sec[gc] + 1):
-                convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
-                dpn_idx += 1
-
-        conv5_x_x = paddle.concat(convX_x_x, axis=1)
-        conv5_x_x = self.conv5_x_x_bn(conv5_x_x)
-
-        y = self.pool2d_avg(conv5_x_x)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-    def get_net_args(self, layers):
-        if layers == 68:
-            k_r = 128
-            G = 32
-            k_sec = [3, 4, 12, 3]
-            inc_sec = [16, 32, 32, 64]
-            bw = [64, 128, 256, 512]
-            r = [64, 64, 64, 64]
-            init_num_filter = 10
-            init_filter_size = 3
-            init_padding = 1
-        elif layers == 92:
-            k_r = 96
-            G = 32
-            k_sec = [3, 4, 20, 3]
-            inc_sec = [16, 32, 24, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 64
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 98:
-            k_r = 160
-            G = 40
-            k_sec = [3, 6, 20, 3]
-            inc_sec = [16, 32, 32, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 96
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 107:
-            k_r = 200
-            G = 50
-            k_sec = [4, 8, 20, 3]
-            inc_sec = [20, 64, 64, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 128
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 131:
-            k_r = 160
-            G = 40
-            k_sec = [4, 8, 28, 3]
-            inc_sec = [16, 32, 32, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 128
-            init_filter_size = 7
-            init_padding = 3
-        else:
-            raise NotImplementedError
-        net_arg = {
-            "k_r": k_r,
-            "G": G,
-            "k_sec": k_sec,
-            "inc_sec": inc_sec,
-            "bw": bw,
-            "r": r,
-        }
-        net_arg["init_num_filter"] = init_num_filter
-        net_arg["init_filter_size"] = init_filter_size
-        net_arg["init_padding"] = init_padding
-
-        return net_arg
-
-
-def DPN68(**args):
-    model = DPN(layers=68, **args)
-    return model
-
-
-def DPN92(**args):
-    model = DPN(layers=92, **args)
-    return model
-
-
-def DPN98(**args):
-    model = DPN(layers=98, **args)
-    return model
-
-
-def DPN107(**args):
-    model = DPN(layers=107, **args)
-    return model
-
-
-def DPN131(**args):
-    model = DPN(layers=131, **args)
-    return model
-
-
-def test_dpn68():
-    load_paddle_module_and_check(
-        DPN68, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_efficientnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_efficientnet.py
deleted file mode 100644
index ec1099ae4b858e6ebe8e0e47894b094bccf8524f..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_efficientnet.py
+++ /dev/null
@@ -1,975 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-import math
-import collections
-import re
-import copy
-
-__all__ = [
-    "EfficientNet",
-    "EfficientNetB0_small",
-    "EfficientNetB0",
-    "EfficientNetB1",
-    "EfficientNetB2",
-    "EfficientNetB3",
-    "EfficientNetB4",
-    "EfficientNetB5",
-    "EfficientNetB6",
-    "EfficientNetB7",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-GlobalParams = collections.namedtuple(
-    "GlobalParams",
-    [
-        "batch_norm_momentum",
-        "batch_norm_epsilon",
-        "dropout_rate",
-        "num_classes",
-        "width_coefficient",
-        "depth_coefficient",
-        "depth_divisor",
-        "min_depth",
-        "drop_connect_rate",
-    ],
-)
-
-BlockArgs = collections.namedtuple(
-    "BlockArgs",
-    [
-        "kernel_size",
-        "num_repeat",
-        "input_filters",
-        "output_filters",
-        "expand_ratio",
-        "id_skip",
-        "stride",
-        "se_ratio",
-    ],
-)
-
-GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
-BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
-
-
-def efficientnet_params(model_name):
-    """ Map EfficientNet model name to parameter coefficients. """
-    params_dict = {
-        # Coefficients:   width,depth,resolution,dropout
-        "efficientnet-b0": (1.0, 1.0, 224, 0.2),
-        "efficientnet-b1": (1.0, 1.1, 240, 0.2),
-        "efficientnet-b2": (1.1, 1.2, 260, 0.3),
-        "efficientnet-b3": (1.2, 1.4, 300, 0.3),
-        "efficientnet-b4": (1.4, 1.8, 380, 0.4),
-        "efficientnet-b5": (1.6, 2.2, 456, 0.4),
-        "efficientnet-b6": (1.8, 2.6, 528, 0.5),
-        "efficientnet-b7": (2.0, 3.1, 600, 0.5),
-    }
-    return params_dict[model_name]
-
-
-def efficientnet(
-    width_coefficient=None,
-    depth_coefficient=None,
-    dropout_rate=0.2,
-    drop_connect_rate=0.2,
-):
-    """ Get block arguments according to parameter and coefficients. """
-    blocks_args = [
-        "r1_k3_s11_e1_i32_o16_se0.25",
-        "r2_k3_s22_e6_i16_o24_se0.25",
-        "r2_k5_s22_e6_i24_o40_se0.25",
-        "r3_k3_s22_e6_i40_o80_se0.25",
-        "r3_k5_s11_e6_i80_o112_se0.25",
-        "r4_k5_s22_e6_i112_o192_se0.25",
-        "r1_k3_s11_e6_i192_o320_se0.25",
-    ]
-    blocks_args = BlockDecoder.decode(blocks_args)
-
-    global_params = GlobalParams(
-        batch_norm_momentum=0.99,
-        batch_norm_epsilon=1e-3,
-        dropout_rate=dropout_rate,
-        drop_connect_rate=drop_connect_rate,
-        num_classes=1000,
-        width_coefficient=width_coefficient,
-        depth_coefficient=depth_coefficient,
-        depth_divisor=8,
-        min_depth=None,
-    )
-
-    return blocks_args, global_params
-
-
-def get_model_params(model_name, override_params):
-    """ Get the block args and global params for a given model """
-    if model_name.startswith("efficientnet"):
-        w, d, _, p = efficientnet_params(model_name)
-        blocks_args, global_params = efficientnet(
-            width_coefficient=w, depth_coefficient=d, dropout_rate=p
-        )
-    else:
-        raise NotImplementedError("model name is not pre-defined: %s" % model_name)
-    if override_params:
-        global_params = global_params._replace(**override_params)
-    return blocks_args, global_params
-
-
-def round_filters(filters, global_params):
-    """ Calculate and round number of filters based on depth multiplier. """
-    multiplier = global_params.width_coefficient
-    if not multiplier:
-        return filters
-    divisor = global_params.depth_divisor
-    min_depth = global_params.min_depth
-    filters *= multiplier
-    min_depth = min_depth or divisor
-    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
-        new_filters += divisor
-    return int(new_filters)
-
-
-def round_repeats(repeats, global_params):
-    """ Round number of filters based on depth multiplier. """
-    multiplier = global_params.depth_coefficient
-    if not multiplier:
-        return repeats
-    return int(math.ceil(multiplier * repeats))
-
-
-class BlockDecoder(object):
-    """
-    Block Decoder, straight from the official TensorFlow repository.
-    """
-
-    @staticmethod
-    def _decode_block_string(block_string):
-        """ Gets a block through a string notation of arguments. """
-        assert isinstance(block_string, str)
-
-        ops = block_string.split("_")
-        options = {}
-        for op in ops:
-            splits = re.split(r"(\d.*)", op)
-            if len(splits) >= 2:
-                key, value = splits[:2]
-                options[key] = value
-
-        # Check stride
-        cond_1 = "s" in options and len(options["s"]) == 1
-        cond_2 = (len(options["s"]) == 2) and (options["s"][0] == options["s"][1])
-        assert cond_1 or cond_2
-
-        return BlockArgs(
-            kernel_size=int(options["k"]),
-            num_repeat=int(options["r"]),
-            input_filters=int(options["i"]),
-            output_filters=int(options["o"]),
-            expand_ratio=int(options["e"]),
-            id_skip=("noskip" not in block_string),
-            se_ratio=float(options["se"]) if "se" in options else None,
-            stride=[int(options["s"][0])],
-        )
-
-    @staticmethod
-    def _encode_block_string(block):
-        """Encodes a block to a string."""
-        args = [
-            "r%d" % block.num_repeat,
-            "k%d" % block.kernel_size,
-            "s%d%d" % (block.strides[0], block.strides[1]),
-            "e%s" % block.expand_ratio,
-            "i%d" % block.input_filters,
-            "o%d" % block.output_filters,
-        ]
-        if 0 < block.se_ratio <= 1:
-            args.append("se%s" % block.se_ratio)
-        if block.id_skip is False:
-            args.append("noskip")
-        return "_".join(args)
-
-    @staticmethod
-    def decode(string_list):
-        """
-        Decode a list of string notations to specify blocks in the network.
-        string_list: list of strings, each string is a notation of block
-        return
-            list of BlockArgs namedtuples of block args
-        """
-        assert isinstance(string_list, list)
-        blocks_args = []
-        for block_string in string_list:
-            blocks_args.append(BlockDecoder._decode_block_string(block_string))
-        return blocks_args
-
-    @staticmethod
-    def encode(blocks_args):
-        """
-        Encodes a list of BlockArgs to a list of strings.
-        :param blocks_args: a list of BlockArgs namedtuples of block args
-        :return: a list of strings, each string is a notation of block
-        """
-        block_strings = []
-        for block in blocks_args:
-            block_strings.append(BlockDecoder._encode_block_string(block))
-        return block_strings
-
-
-def initial_type(name, use_bias=False):
-    param_attr = ParamAttr(name=name + "_weights")
-    if use_bias:
-        bias_attr = ParamAttr(name=name + "_offset")
-    else:
-        bias_attr = False
-    return param_attr, bias_attr
-
-
-def init_batch_norm_layer(name="batch_norm"):
-    param_attr = ParamAttr(name=name + "_scale")
-    bias_attr = ParamAttr(name=name + "_offset")
-    return param_attr, bias_attr
-
-
-def init_fc_layer(name="fc"):
-    param_attr = ParamAttr(name=name + "_weights")
-    bias_attr = ParamAttr(name=name + "_offset")
-    return param_attr, bias_attr
-
-
-def cal_padding(img_size, stride, filter_size, dilation=1):
-    """Calculate padding size."""
-    if img_size % stride == 0:
-        out_size = max(filter_size - stride, 0)
-    else:
-        out_size = max(filter_size - (img_size % stride), 0)
-    return out_size // 2, out_size - out_size // 2
-
-
-inp_shape = {
-    "b0_small": [224, 112, 112, 56, 28, 14, 14, 7],
-    "b0": [224, 112, 112, 56, 28, 14, 14, 7],
-    "b1": [240, 120, 120, 60, 30, 15, 15, 8],
-    "b2": [260, 130, 130, 65, 33, 17, 17, 9],
-    "b3": [300, 150, 150, 75, 38, 19, 19, 10],
-    "b4": [380, 190, 190, 95, 48, 24, 24, 12],
-    "b5": [456, 228, 228, 114, 57, 29, 29, 15],
-    "b6": [528, 264, 264, 132, 66, 33, 33, 17],
-    "b7": [600, 300, 300, 150, 75, 38, 38, 19],
-}
-
-
-def _drop_connect(inputs, prob, is_test):
-    if is_test:
-        output = inputs
-    else:
-        keep_prob = 1.0 - prob
-        inputs_shape = paddle.shape(inputs)
-        random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1])
-        binary_tensor = paddle.floor(random_tensor)
-        output = paddle.multiply(inputs, binary_tensor) / keep_prob
-    return output
-
-
-class Conv2ds(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=None,
-        name="conv2d",
-        act=None,
-        use_bias=False,
-        padding_type=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(Conv2ds, self).__init__()
-        assert act in [None, "swish", "sigmoid"]
-        self.act = act
-
-        param_attr, bias_attr = initial_type(name=name, use_bias=use_bias)
-
-        def get_padding(filter_size, stride=1, dilation=1):
-            padding = ((stride - 1) + dilation * (filter_size - 1)) // 2
-            return padding
-
-        inps = (
-            1
-            if model_name == None and cur_stage == None
-            else inp_shape[model_name][cur_stage]
-        )
-        self.need_crop = False
-        if padding_type == "SAME":
-            top_padding, bottom_padding = cal_padding(inps, stride, filter_size)
-            left_padding, right_padding = cal_padding(inps, stride, filter_size)
-            height_padding = bottom_padding
-            width_padding = right_padding
-            if top_padding != bottom_padding or left_padding != right_padding:
-                height_padding = top_padding + stride
-                width_padding = left_padding + stride
-                self.need_crop = True
-            padding = [height_padding, width_padding]
-        elif padding_type == "VALID":
-            height_padding = 0
-            width_padding = 0
-            padding = [height_padding, width_padding]
-        elif padding_type == "DYNAMIC":
-            padding = get_padding(filter_size, stride)
-        else:
-            padding = padding_type
-
-        groups = 1 if groups is None else groups
-        self._conv = Conv2D(
-            input_channels,
-            output_channels,
-            filter_size,
-            groups=groups,
-            stride=stride,
-            #             act=act,
-            padding=padding,
-            weight_attr=param_attr,
-            bias_attr=bias_attr,
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        if self.act == "swish":
-            x = F.swish(x)
-        elif self.act == "sigmoid":
-            x = F.sigmoid(x)
-
-        if self.need_crop:
-            x = x[:, :, 1:, 1:]
-        return x
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        filter_size,
-        output_channels,
-        stride=1,
-        num_groups=1,
-        padding_type="SAME",
-        conv_act=None,
-        bn_act="swish",
-        use_bn=True,
-        use_bias=False,
-        name=None,
-        conv_name=None,
-        bn_name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2ds(
-            input_channels=input_channels,
-            output_channels=output_channels,
-            filter_size=filter_size,
-            stride=stride,
-            groups=num_groups,
-            act=conv_act,
-            padding_type=padding_type,
-            name=conv_name,
-            use_bias=use_bias,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-        self.use_bn = use_bn
-        if use_bn is True:
-            bn_name = name + bn_name
-            param_attr, bias_attr = init_batch_norm_layer(bn_name)
-
-            self._bn = BatchNorm(
-                num_channels=output_channels,
-                act=bn_act,
-                momentum=0.99,
-                epsilon=0.001,
-                moving_mean_name=bn_name + "_mean",
-                moving_variance_name=bn_name + "_variance",
-                param_attr=param_attr,
-                bias_attr=bias_attr,
-            )
-
-    def forward(self, inputs):
-        if self.use_bn:
-            x = self._conv(inputs)
-            x = self._bn(x)
-            return x
-        else:
-            return self._conv(inputs)
-
-
-class ExpandConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ExpandConvNorm, self).__init__()
-
-        self.oup = block_args.input_filters * block_args.expand_ratio
-        self.expand_ratio = block_args.expand_ratio
-
-        if self.expand_ratio != 1:
-            self._conv = ConvBNLayer(
-                input_channels,
-                1,
-                self.oup,
-                bn_act=None,
-                padding_type=padding_type,
-                name=name,
-                conv_name=name + "_expand_conv",
-                bn_name="_bn0",
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-    def forward(self, inputs):
-        if self.expand_ratio != 1:
-            return self._conv(inputs)
-        else:
-            return inputs
-
-
-class DepthwiseConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(DepthwiseConvNorm, self).__init__()
-
-        self.k = block_args.kernel_size
-        self.s = block_args.stride
-        if isinstance(self.s, list) or isinstance(self.s, tuple):
-            self.s = self.s[0]
-        oup = block_args.input_filters * block_args.expand_ratio
-
-        self._conv = ConvBNLayer(
-            input_channels,
-            self.k,
-            oup,
-            self.s,
-            num_groups=input_channels,
-            bn_act=None,
-            padding_type=padding_type,
-            name=name,
-            conv_name=name + "_depthwise_conv",
-            bn_name="_bn1",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class ProjectConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ProjectConvNorm, self).__init__()
-
-        final_oup = block_args.output_filters
-
-        self._conv = ConvBNLayer(
-            input_channels,
-            1,
-            final_oup,
-            bn_act=None,
-            padding_type=padding_type,
-            name=name,
-            conv_name=name + "_project_conv",
-            bn_name="_bn2",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class SEBlock(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        num_squeezed_channels,
-        oup,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(SEBlock, self).__init__()
-
-        self._pool = AdaptiveAvgPool2D(1)
-        self._conv1 = Conv2ds(
-            input_channels,
-            num_squeezed_channels,
-            1,
-            use_bias=True,
-            padding_type=padding_type,
-            act="swish",
-            name=name + "_se_reduce",
-        )
-
-        self._conv2 = Conv2ds(
-            num_squeezed_channels,
-            oup,
-            1,
-            act="sigmoid",
-            use_bias=True,
-            padding_type=padding_type,
-            name=name + "_se_expand",
-        )
-
-    def forward(self, inputs):
-        x = self._pool(inputs)
-        x = self._conv1(x)
-        x = self._conv2(x)
-        out = paddle.multiply(inputs, x)
-        return out
-
-
-class MbConvBlock(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        use_se,
-        name=None,
-        drop_connect_rate=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(MbConvBlock, self).__init__()
-
-        oup = block_args.input_filters * block_args.expand_ratio
-        self.block_args = block_args
-        self.has_se = (
-            use_se
-            and (block_args.se_ratio is not None)
-            and (0 < block_args.se_ratio <= 1)
-        )
-        self.id_skip = block_args.id_skip
-        self.expand_ratio = block_args.expand_ratio
-        self.drop_connect_rate = drop_connect_rate
-
-        if self.expand_ratio != 1:
-            self._ecn = ExpandConvNorm(
-                input_channels,
-                block_args,
-                padding_type=padding_type,
-                name=name,
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-        self._dcn = DepthwiseConvNorm(
-            input_channels * block_args.expand_ratio,
-            block_args,
-            padding_type=padding_type,
-            name=name,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-        if self.has_se:
-            num_squeezed_channels = max(
-                1, int(block_args.input_filters * block_args.se_ratio)
-            )
-            self._se = SEBlock(
-                input_channels * block_args.expand_ratio,
-                num_squeezed_channels,
-                oup,
-                padding_type=padding_type,
-                name=name,
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-        self._pcn = ProjectConvNorm(
-            input_channels * block_args.expand_ratio,
-            block_args,
-            padding_type=padding_type,
-            name=name,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        x = inputs
-        if self.expand_ratio != 1:
-            x = self._ecn(x)
-            x = F.swish(x)
-
-        x = self._dcn(x)
-        x = F.swish(x)
-        if self.has_se:
-            x = self._se(x)
-        x = self._pcn(x)
-
-        if (
-            self.id_skip
-            and self.block_args.stride == 1
-            and self.block_args.input_filters == self.block_args.output_filters
-        ):
-            if self.drop_connect_rate:
-                x = _drop_connect(x, self.drop_connect_rate, not self.training)
-            x = paddle.add(x, inputs)
-        return x
-
-
-class ConvStemNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        padding_type,
-        _global_params,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ConvStemNorm, self).__init__()
-
-        output_channels = round_filters(32, _global_params)
-        self._conv = ConvBNLayer(
-            input_channels,
-            filter_size=3,
-            output_channels=output_channels,
-            stride=2,
-            bn_act=None,
-            padding_type=padding_type,
-            name="",
-            conv_name="_conv_stem",
-            bn_name="_bn0",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class ExtractFeatures(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        _block_args,
-        _global_params,
-        padding_type,
-        use_se,
-        model_name=None,
-    ):
-        super(ExtractFeatures, self).__init__()
-
-        self._global_params = _global_params
-
-        self._conv_stem = ConvStemNorm(
-            input_channels,
-            padding_type=padding_type,
-            _global_params=_global_params,
-            model_name=model_name,
-            cur_stage=0,
-        )
-
-        self.block_args_copy = copy.deepcopy(_block_args)
-        idx = 0
-        block_size = 0
-        for block_arg in self.block_args_copy:
-            block_arg = block_arg._replace(
-                input_filters=round_filters(block_arg.input_filters, _global_params),
-                output_filters=round_filters(block_arg.output_filters, _global_params),
-                num_repeat=round_repeats(block_arg.num_repeat, _global_params),
-            )
-            block_size += 1
-            for _ in range(block_arg.num_repeat - 1):
-                block_size += 1
-
-        self.conv_seq = []
-        cur_stage = 1
-        for block_args in _block_args:
-            block_args = block_args._replace(
-                input_filters=round_filters(block_args.input_filters, _global_params),
-                output_filters=round_filters(block_args.output_filters, _global_params),
-                num_repeat=round_repeats(block_args.num_repeat, _global_params),
-            )
-
-            drop_connect_rate = self._global_params.drop_connect_rate
-            if drop_connect_rate:
-                drop_connect_rate *= float(idx) / block_size
-
-            _mc_block = self.add_sublayer(
-                "_blocks." + str(idx) + ".",
-                MbConvBlock(
-                    block_args.input_filters,
-                    block_args=block_args,
-                    padding_type=padding_type,
-                    use_se=use_se,
-                    name="_blocks." + str(idx) + ".",
-                    drop_connect_rate=drop_connect_rate,
-                    model_name=model_name,
-                    cur_stage=cur_stage,
-                ),
-            )
-            self.conv_seq.append(_mc_block)
-            idx += 1
-            if block_args.num_repeat > 1:
-                block_args = block_args._replace(
-                    input_filters=block_args.output_filters, stride=1
-                )
-            for _ in range(block_args.num_repeat - 1):
-                drop_connect_rate = self._global_params.drop_connect_rate
-                if drop_connect_rate:
-                    drop_connect_rate *= float(idx) / block_size
-                _mc_block = self.add_sublayer(
-                    "block." + str(idx) + ".",
-                    MbConvBlock(
-                        block_args.input_filters,
-                        block_args,
-                        padding_type=padding_type,
-                        use_se=use_se,
-                        name="_blocks." + str(idx) + ".",
-                        drop_connect_rate=drop_connect_rate,
-                        model_name=model_name,
-                        cur_stage=cur_stage,
-                    ),
-                )
-                self.conv_seq.append(_mc_block)
-                idx += 1
-            cur_stage += 1
-
-    def forward(self, inputs):
-        x = self._conv_stem(inputs)
-        x = F.swish(x)
-        for _mc_block in self.conv_seq:
-            x = _mc_block(x)
-        return x
-
-
-class EfficientNet(nn.Layer):
-    def __init__(
-        self,
-        name="b0",
-        padding_type="SAME",
-        override_params=None,
-        use_se=True,
-        class_dim=1000,
-    ):
-        super(EfficientNet, self).__init__()
-
-        model_name = "efficientnet-" + name
-        self.name = name
-        self._block_args, self._global_params = get_model_params(
-            model_name, override_params
-        )
-        self.padding_type = padding_type
-        self.use_se = use_se
-
-        self._ef = ExtractFeatures(
-            3,
-            self._block_args,
-            self._global_params,
-            self.padding_type,
-            self.use_se,
-            model_name=self.name,
-        )
-
-        output_channels = round_filters(1280, self._global_params)
-        if name == "b0_small" or name == "b0" or name == "b1":
-            oup = 320
-        elif name == "b2":
-            oup = 352
-        elif name == "b3":
-            oup = 384
-        elif name == "b4":
-            oup = 448
-        elif name == "b5":
-            oup = 512
-        elif name == "b6":
-            oup = 576
-        elif name == "b7":
-            oup = 640
-        self._conv = ConvBNLayer(
-            oup,
-            1,
-            output_channels,
-            bn_act="swish",
-            padding_type=self.padding_type,
-            name="",
-            conv_name="_conv_head",
-            bn_name="_bn1",
-            model_name=self.name,
-            cur_stage=7,
-        )
-        self._pool = AdaptiveAvgPool2D(1)
-
-        if self._global_params.dropout_rate:
-            self._drop = Dropout(
-                p=self._global_params.dropout_rate, mode="upscale_in_train"
-            )
-
-        param_attr, bias_attr = init_fc_layer("_fc")
-        self._fc = Linear(
-            output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr
-        )
-
-    def forward(self, inputs):
-        x = self._ef(inputs)
-        x = self._conv(x)
-        x = self._pool(x)
-        if self._global_params.dropout_rate:
-            x = self._drop(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._fc(x)
-        return x
-
-
-def EfficientNetB0_small(
-    padding_type="DYNAMIC", override_params=None, use_se=False, **args
-):
-    model = EfficientNet(
-        name="b0",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB0(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b0",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB1(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b1",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB2(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b2",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB3(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b3",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB4(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b4",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB5(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b5",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB6(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b6",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB7(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b7",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def test_EfficientNetB0_small():
-    load_paddle_module_and_check(
-        EfficientNetB0_small, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_ghostnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_ghostnet.py
deleted file mode 100644
index 78bbed08d55edb3693aec6cf09eccf60a0fb9177..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_ghostnet.py
+++ /dev/null
@@ -1,361 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import math
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
-from paddle.regularizer import L2Decay
-from paddle.nn.initializer import Uniform, KaimingNormal
-
-__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-
-        self._batch_norm = BatchNorm(
-            num_channels=out_channels,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale", regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(name=bn_name + "_offset", regularizer=L2Decay(0.0)),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class SEBlock(nn.Layer):
-    def __init__(self, num_channels, reduction_ratio=4, name=None):
-        super(SEBlock, self).__init__()
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-        self._num_channels = num_channels
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        med_ch = num_channels // reduction_ratio
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_1_offset"),
-        )
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_channels,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_2_offset"),
-        )
-
-    def forward(self, inputs):
-        pool = self.pool2d_gap(inputs)
-        pool = paddle.squeeze(pool, axis=[2, 3])
-        squeeze = self.squeeze(pool)
-        squeeze = F.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = paddle.clip(x=excitation, min=0, max=1)
-        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
-        out = paddle.multiply(inputs, excitation)
-        return out
-
-
-class GhostModule(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        output_channels,
-        kernel_size=1,
-        ratio=2,
-        dw_size=3,
-        stride=1,
-        relu=True,
-        name=None,
-    ):
-        super(GhostModule, self).__init__()
-        init_channels = int(math.ceil(output_channels / ratio))
-        new_channels = int(init_channels * (ratio - 1))
-        self.primary_conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=init_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            groups=1,
-            act="relu" if relu else None,
-            name=name + "_primary_conv",
-        )
-        self.cheap_operation = ConvBNLayer(
-            in_channels=init_channels,
-            out_channels=new_channels,
-            kernel_size=dw_size,
-            stride=1,
-            groups=init_channels,
-            act="relu" if relu else None,
-            name=name + "_cheap_operation",
-        )
-
-    def forward(self, inputs):
-        x = self.primary_conv(inputs)
-        y = self.cheap_operation(x)
-        out = paddle.concat([x, y], axis=1)
-        return out
-
-
-class GhostBottleneck(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        hidden_dim,
-        output_channels,
-        kernel_size,
-        stride,
-        use_se,
-        name=None,
-    ):
-        super(GhostBottleneck, self).__init__()
-        self._stride = stride
-        self._use_se = use_se
-        self._num_channels = in_channels
-        self._output_channels = output_channels
-        self.ghost_module_1 = GhostModule(
-            in_channels=in_channels,
-            output_channels=hidden_dim,
-            kernel_size=1,
-            stride=1,
-            relu=True,
-            name=name + "_ghost_module_1",
-        )
-        if stride == 2:
-            self.depthwise_conv = ConvBNLayer(
-                in_channels=hidden_dim,
-                out_channels=hidden_dim,
-                kernel_size=kernel_size,
-                stride=stride,
-                groups=hidden_dim,
-                act=None,
-                name=name
-                + "_depthwise_depthwise",  # looks strange due to an old typo, will be fixed later.
-            )
-        if use_se:
-            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
-        self.ghost_module_2 = GhostModule(
-            in_channels=hidden_dim,
-            output_channels=output_channels,
-            kernel_size=1,
-            relu=False,
-            name=name + "_ghost_module_2",
-        )
-        if stride != 1 or in_channels != output_channels:
-            self.shortcut_depthwise = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=in_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                groups=in_channels,
-                act=None,
-                name=name
-                + "_shortcut_depthwise_depthwise",  # looks strange due to an old typo, will be fixed later.
-            )
-            self.shortcut_conv = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=output_channels,
-                kernel_size=1,
-                stride=1,
-                groups=1,
-                act=None,
-                name=name + "_shortcut_conv",
-            )
-
-    def forward(self, inputs):
-        x = self.ghost_module_1(inputs)
-        if self._stride == 2:
-            x = self.depthwise_conv(x)
-        if self._use_se:
-            x = self.se_block(x)
-        x = self.ghost_module_2(x)
-        if self._stride == 1 and self._num_channels == self._output_channels:
-            shortcut = inputs
-        else:
-            shortcut = self.shortcut_depthwise(inputs)
-            shortcut = self.shortcut_conv(shortcut)
-        return paddle.add(x=x, y=shortcut)
-
-
-class GhostNet(nn.Layer):
-    def __init__(self, scale, class_dim=1000):
-        super(GhostNet, self).__init__()
-        self.cfgs = [
-            # k, t, c, SE, s
-            [3, 16, 16, 0, 1],
-            [3, 48, 24, 0, 2],
-            [3, 72, 24, 0, 1],
-            [5, 72, 40, 1, 2],
-            [5, 120, 40, 1, 1],
-            [3, 240, 80, 0, 2],
-            [3, 200, 80, 0, 1],
-            [3, 184, 80, 0, 1],
-            [3, 184, 80, 0, 1],
-            [3, 480, 112, 1, 1],
-            [3, 672, 112, 1, 1],
-            [5, 672, 160, 1, 2],
-            [5, 960, 160, 0, 1],
-            [5, 960, 160, 1, 1],
-            [5, 960, 160, 0, 1],
-            [5, 960, 160, 1, 1],
-        ]
-        self.scale = scale
-        output_channels = int(self._make_divisible(16 * self.scale, 4))
-        self.conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=output_channels,
-            kernel_size=3,
-            stride=2,
-            groups=1,
-            act="relu",
-            name="conv1",
-        )
-        # build inverted residual blocks
-        idx = 0
-        self.ghost_bottleneck_list = []
-        for k, exp_size, c, use_se, s in self.cfgs:
-            in_channels = output_channels
-            output_channels = int(self._make_divisible(c * self.scale, 4))
-            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
-            ghost_bottleneck = self.add_sublayer(
-                name="_ghostbottleneck_" + str(idx),
-                sublayer=GhostBottleneck(
-                    in_channels=in_channels,
-                    hidden_dim=hidden_dim,
-                    output_channels=output_channels,
-                    kernel_size=k,
-                    stride=s,
-                    use_se=use_se,
-                    name="_ghostbottleneck_" + str(idx),
-                ),
-            )
-            self.ghost_bottleneck_list.append(ghost_bottleneck)
-            idx += 1
-        # build last several layers
-        in_channels = output_channels
-        output_channels = int(self._make_divisible(exp_size * self.scale, 4))
-        self.conv_last = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=output_channels,
-            kernel_size=1,
-            stride=1,
-            groups=1,
-            act="relu",
-            name="conv_last",
-        )
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-        in_channels = output_channels
-        self._fc0_output_channels = 1280
-        self.fc_0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=self._fc0_output_channels,
-            kernel_size=1,
-            stride=1,
-            act="relu",
-            name="fc_0",
-        )
-        self.dropout = nn.Dropout(p=0.2)
-        stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
-        self.fc_1 = Linear(
-            self._fc0_output_channels,
-            class_dim,
-            weight_attr=ParamAttr(
-                name="fc_1_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="fc_1_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self.conv1(inputs)
-        for ghost_bottleneck in self.ghost_bottleneck_list:
-            x = ghost_bottleneck(x)
-        x = self.conv_last(x)
-        x = self.pool2d_gap(x)
-        x = self.fc_0(x)
-        x = self.dropout(x)
-        x = paddle.reshape(x, shape=[-1, self._fc0_output_channels])
-        x = self.fc_1(x)
-        return x
-
-    def _make_divisible(self, v, divisor, min_value=None):
-        """
-        This function is taken from the original tf repo.
-        It ensures that all layers have a channel number that is divisible by 8
-        It can be seen here:
-        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
-        """
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        # Make sure that round down does not go down by more than 10%.
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-
-
-def GhostNet_x0_5(**args):
-    model = GhostNet(scale=0.5)
-    return model
-
-
-def GhostNet_x1_0(**args):
-    model = GhostNet(scale=1.0)
-    return model
-
-
-def GhostNet_x1_3(**args):
-    model = GhostNet(scale=1.3)
-    return model
-
-
-def test_GhostNet_x0_5():
-    load_paddle_module_and_check(
-        GhostNet_x0_5, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_googlenet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_googlenet.py
deleted file mode 100644
index d94bc5255b668ed0866479c974f2e6282625d0c2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_googlenet.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["GoogLeNet"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def xavier(channels, filter_size, name):
-    stdv = (3.0 / (filter_size ** 2 * channels)) ** 0.5
-    param_attr = ParamAttr(initializer=Uniform(-stdv, stdv), name=name + "_weights")
-    return param_attr
-
-
-class ConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        return y
-
-
-class Inception(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter1,
-        filter3R,
-        filter3,
-        filter5R,
-        filter5,
-        proj,
-        name=None,
-    ):
-        super(Inception, self).__init__()
-
-        self._conv1 = ConvLayer(
-            input_channels, filter1, 1, name="inception_" + name + "_1x1"
-        )
-        self._conv3r = ConvLayer(
-            input_channels, filter3R, 1, name="inception_" + name + "_3x3_reduce"
-        )
-        self._conv3 = ConvLayer(filter3R, filter3, 3, name="inception_" + name + "_3x3")
-        self._conv5r = ConvLayer(
-            input_channels, filter5R, 1, name="inception_" + name + "_5x5_reduce"
-        )
-        self._conv5 = ConvLayer(filter5R, filter5, 5, name="inception_" + name + "_5x5")
-        self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1)
-
-        self._convprj = ConvLayer(
-            input_channels, proj, 1, name="inception_" + name + "_3x3_proj"
-        )
-
-    def forward(self, inputs):
-        conv1 = self._conv1(inputs)
-
-        conv3r = self._conv3r(inputs)
-        conv3 = self._conv3(conv3r)
-
-        conv5r = self._conv5r(inputs)
-        conv5 = self._conv5(conv5r)
-
-        pool = self._pool(inputs)
-        convprj = self._convprj(pool)
-
-        cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1)
-        cat = F.relu(cat)
-        return cat
-
-
-class GoogLeNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(GoogLeNetDY, self).__init__()
-        self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
-        self._pool = MaxPool2D(kernel_size=3, stride=2)
-        self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1")
-        self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3")
-
-        self._ince3a = Inception(192, 192, 64, 96, 128, 16, 32, 32, name="ince3a")
-        self._ince3b = Inception(256, 256, 128, 128, 192, 32, 96, 64, name="ince3b")
-
-        self._ince4a = Inception(480, 480, 192, 96, 208, 16, 48, 64, name="ince4a")
-        self._ince4b = Inception(512, 512, 160, 112, 224, 24, 64, 64, name="ince4b")
-        self._ince4c = Inception(512, 512, 128, 128, 256, 24, 64, 64, name="ince4c")
-        self._ince4d = Inception(512, 512, 112, 144, 288, 32, 64, 64, name="ince4d")
-        self._ince4e = Inception(528, 528, 256, 160, 320, 32, 128, 128, name="ince4e")
-
-        self._ince5a = Inception(832, 832, 256, 160, 320, 32, 128, 128, name="ince5a")
-        self._ince5b = Inception(832, 832, 384, 192, 384, 48, 128, 128, name="ince5b")
-
-        self._pool_5 = AvgPool2D(kernel_size=7, stride=7)
-
-        self._drop = Dropout(p=0.4, mode="downscale_in_infer")
-        self._fc_out = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out"),
-            bias_attr=ParamAttr(name="out_offset"),
-        )
-        self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
-        self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1")
-        self._fc_o1 = Linear(
-            1152,
-            1024,
-            weight_attr=xavier(2048, 1, "fc_o1"),
-            bias_attr=ParamAttr(name="fc_o1_offset"),
-        )
-        self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
-        self._out1 = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out1"),
-            bias_attr=ParamAttr(name="out1_offset"),
-        )
-        self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
-        self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2")
-        self._fc_o2 = Linear(
-            1152,
-            1024,
-            weight_attr=xavier(2048, 1, "fc_o2"),
-            bias_attr=ParamAttr(name="fc_o2_offset"),
-        )
-        self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
-        self._out2 = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out2"),
-            bias_attr=ParamAttr(name="out2_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._pool(x)
-        x = self._conv_1(x)
-        x = self._conv_2(x)
-        x = self._pool(x)
-
-        x = self._ince3a(x)
-        x = self._ince3b(x)
-        x = self._pool(x)
-
-        ince4a = self._ince4a(x)
-        x = self._ince4b(ince4a)
-        x = self._ince4c(x)
-        ince4d = self._ince4d(x)
-        x = self._ince4e(ince4d)
-        x = self._pool(x)
-
-        x = self._ince5a(x)
-        ince5b = self._ince5b(x)
-
-        x = self._pool_5(ince5b)
-        x = self._drop(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        out = self._fc_out(x)
-
-        # x = self._pool_o1(ince4a)
-        # x = self._conv_o1(x)
-        # x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        # x = self._fc_o1(x)
-        # x = F.relu(x)
-        # x = self._drop_o1(x)
-        # out1 = self._out1(x)
-
-        # x = self._pool_o2(ince4d)
-        # x = self._conv_o2(x)
-        # x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        # x = self._fc_o2(x)
-        # x = self._drop_o2(x)
-        # out2 = self._out2(x)
-        return out
-
-
-def GoogLeNet(**args):
-    model = GoogLeNetDY(**args)
-    return model
-
-
-def test_GoogLeNet():
-    load_paddle_module_and_check(
-        GoogLeNet, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv3.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv3.py
deleted file mode 100644
index d13bdd323804211ac5f0c2af39e1005f1e496031..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv3.py
+++ /dev/null
@@ -1,598 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["InceptionV3"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self.conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self.batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        y = self.batch_norm(y)
-        return y
-
-
-class InceptionStem(nn.Layer):
-    def __init__(self):
-        super(InceptionStem, self).__init__()
-        self.conv_1a_3x3 = ConvBNLayer(
-            num_channels=3,
-            num_filters=32,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="conv_1a_3x3",
-        )
-        self.conv_2a_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=32,
-            filter_size=3,
-            stride=1,
-            act="relu",
-            name="conv_2a_3x3",
-        )
-        self.conv_2b_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=64,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="conv_2b_3x3",
-        )
-        self.maxpool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self.conv_3b_1x1 = ConvBNLayer(
-            num_channels=64,
-            num_filters=80,
-            filter_size=1,
-            act="relu",
-            name="conv_3b_1x1",
-        )
-        self.conv_4a_3x3 = ConvBNLayer(
-            num_channels=80,
-            num_filters=192,
-            filter_size=3,
-            act="relu",
-            name="conv_4a_3x3",
-        )
-
-    def forward(self, x):
-        y = self.conv_1a_3x3(x)
-        y = self.conv_2a_3x3(y)
-        y = self.conv_2b_3x3(y)
-        y = self.maxpool(y)
-        y = self.conv_3b_1x1(y)
-        y = self.conv_4a_3x3(y)
-        y = self.maxpool(y)
-        return y
-
-
-class InceptionA(nn.Layer):
-    def __init__(self, num_channels, pool_features, name=None):
-        super(InceptionA, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch1x1_" + name,
-        )
-        self.branch5x5_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=48,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch5x5_1_" + name,
-        )
-        self.branch5x5_2 = ConvBNLayer(
-            num_channels=48,
-            num_filters=64,
-            filter_size=5,
-            padding=2,
-            act="relu",
-            name="inception_a_branch5x5_2_" + name,
-        )
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_3_" + name,
-        )
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=pool_features,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-        branch5x5 = self.branch5x5_1(x)
-        branch5x5 = self.branch5x5_2(branch5x5)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
-
-        branch_pool = self.branch_pool(x)
-
-        branch_pool = self.branch_pool_conv(branch_pool)
-        outputs = paddle.concat(
-            [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1
-        )
-        return outputs
-
-
-class InceptionB(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionB, self).__init__()
-        self.branch3x3 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_b_branch3x3_" + name,
-        )
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_b_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_b_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_b_branch3x3dbl_3_" + name,
-        )
-        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
-
-    def forward(self, x):
-        branch3x3 = self.branch3x3(x)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
-
-        branch_pool = self.branch_pool(x)
-
-        outputs = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
-
-        return outputs
-
-
-class InceptionC(nn.Layer):
-    def __init__(self, num_channels, channels_7x7, name=None):
-        super(InceptionC, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch1x1_" + name,
-        )
-        self.branch7x7_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
-            stride=1,
-            act="relu",
-            name="inception_c_branch7x7_1_" + name,
-        )
-        self.branch7x7_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
-            stride=1,
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7_2_" + name,
-        )
-        self.branch7x7_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(7, 1),
-            stride=1,
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7_3_" + name,
-        )
-
-        self.branch7x7dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch7x7dbl_1_" + name,
-        )
-        self.branch7x7dbl_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7dbl_2_" + name,
-        )
-        self.branch7x7dbl_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7dbl_3_" + name,
-        )
-        self.branch7x7dbl_4 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7dbl_4_" + name,
-        )
-        self.branch7x7dbl_5 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7dbl_5_" + name,
-        )
-
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-
-        branch7x7 = self.branch7x7_1(x)
-        branch7x7 = self.branch7x7_2(branch7x7)
-        branch7x7 = self.branch7x7_3(branch7x7)
-
-        branch7x7dbl = self.branch7x7dbl_1(x)
-        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
-
-        branch_pool = self.branch_pool(x)
-        branch_pool = self.branch_pool_conv(branch_pool)
-
-        outputs = paddle.concat(
-            [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1
-        )
-
-        return outputs
-
-
-class InceptionD(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionD, self).__init__()
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_d_branch3x3_1_" + name,
-        )
-        self.branch3x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=320,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_d_branch3x3_2_" + name,
-        )
-        self.branch7x7x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_d_branch7x7x3_1_" + name,
-        )
-        self.branch7x7x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_d_branch7x7x3_2_" + name,
-        )
-        self.branch7x7x3_3 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_d_branch7x7x3_3_" + name,
-        )
-        self.branch7x7x3_4 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_d_branch7x7x3_4_" + name,
-        )
-        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
-
-    def forward(self, x):
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = self.branch3x3_2(branch3x3)
-
-        branch7x7x3 = self.branch7x7x3_1(x)
-        branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
-        branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
-        branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
-
-        branch_pool = self.branch_pool(x)
-
-        outputs = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
-        return outputs
-
-
-class InceptionE(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionE, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=320,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch1x1_" + name,
-        )
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch3x3_1_" + name,
-        )
-        self.branch3x3_2a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_e_branch3x3_2a_" + name,
-        )
-        self.branch3x3_2b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_e_branch3x3_2b_" + name,
-        )
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=448,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=448,
-            num_filters=384,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_e_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_e_branch3x3dbl_3a_" + name,
-        )
-        self.branch3x3dbl_3b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_e_branch3x3dbl_3b_" + name,
-        )
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = [
-            self.branch3x3_2a(branch3x3),
-            self.branch3x3_2b(branch3x3),
-        ]
-        branch3x3 = paddle.concat(branch3x3, axis=1)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = [
-            self.branch3x3dbl_3a(branch3x3dbl),
-            self.branch3x3dbl_3b(branch3x3dbl),
-        ]
-        branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
-
-        branch_pool = self.branch_pool(x)
-        branch_pool = self.branch_pool_conv(branch_pool)
-
-        outputs = paddle.concat(
-            [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1
-        )
-        return outputs
-
-
-class InceptionV3(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(InceptionV3, self).__init__()
-        self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
-        self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
-
-        self.inception_stem = InceptionStem()
-        self.inception_block_list = []
-        for i in range(len(self.inception_a_list[0])):
-            inception_a = self.add_sublayer(
-                "inception_a_" + str(i + 1),
-                InceptionA(
-                    self.inception_a_list[0][i],
-                    self.inception_a_list[1][i],
-                    name=str(i + 1),
-                ),
-            )
-            self.inception_block_list.append(inception_a)
-        inception_b = self.add_sublayer("nception_b_1", InceptionB(288, name="1"))
-        self.inception_block_list.append(inception_b)
-
-        for i in range(len(self.inception_c_list[0])):
-            inception_c = self.add_sublayer(
-                "inception_c_" + str(i + 1),
-                InceptionC(
-                    self.inception_c_list[0][i],
-                    self.inception_c_list[1][i],
-                    name=str(i + 1),
-                ),
-            )
-            self.inception_block_list.append(inception_c)
-        inception_d = self.add_sublayer("inception_d_1", InceptionD(768, name="1"))
-        self.inception_block_list.append(inception_d)
-        inception_e = self.add_sublayer("inception_e_1", InceptionE(1280, name="1"))
-        self.inception_block_list.append(inception_e)
-        inception_e = self.add_sublayer("inception_e_2", InceptionE(2048, name="2"))
-        self.inception_block_list.append(inception_e)
-
-        self.gap = AdaptiveAvgPool2D(1)
-        self.drop = Dropout(p=0.2, mode="downscale_in_infer")
-        stdv = 1.0 / math.sqrt(2048 * 1.0)
-        self.out = Linear(
-            2048,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, x):
-        y = self.inception_stem(x)
-        for inception_block in self.inception_block_list:
-            y = inception_block(y)
-        y = self.gap(y)
-        y = paddle.reshape(y, shape=[-1, 2048])
-        y = self.drop(y)
-        y = self.out(y)
-        return y
-
-
-def test_InceptionV3():
-    load_paddle_module_and_check(
-        InceptionV3, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv4.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv4.py
deleted file mode 100644
index ed439e251426058f2db5d65472a5351f04be2ba3..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_inceptionv4.py
+++ /dev/null
@@ -1,481 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["InceptionV4"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InceptionStem(nn.Layer):
-    def __init__(self):
-        super(InceptionStem, self).__init__()
-        self._conv_1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2")
-        self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1")
-        self._conv_3 = ConvBNLayer(
-            32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1"
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2 = ConvBNLayer(
-            64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2"
-        )
-        self._conv1_1 = ConvBNLayer(
-            160, 64, 1, act="relu", name="inception_stem2_3x3_reduce"
-        )
-        self._conv1_2 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3")
-        self._conv2_1 = ConvBNLayer(
-            160, 64, 1, act="relu", name="inception_stem2_1x7_reduce"
-        )
-        self._conv2_2 = ConvBNLayer(
-            64, 64, (7, 1), padding=(3, 0), act="relu", name="inception_stem2_1x7"
-        )
-        self._conv2_3 = ConvBNLayer(
-            64, 64, (1, 7), padding=(0, 3), act="relu", name="inception_stem2_7x1"
-        )
-        self._conv2_4 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3_2")
-        self._conv3 = ConvBNLayer(
-            192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2"
-        )
-
-    def forward(self, inputs):
-        conv = self._conv_1(inputs)
-        conv = self._conv_2(conv)
-        conv = self._conv_3(conv)
-
-        pool1 = self._pool(conv)
-        conv2 = self._conv2(conv)
-        concat = paddle.concat([pool1, conv2], axis=1)
-
-        conv1 = self._conv1_1(concat)
-        conv1 = self._conv1_2(conv1)
-
-        conv2 = self._conv2_1(concat)
-        conv2 = self._conv2_2(conv2)
-        conv2 = self._conv2_3(conv2)
-        conv2 = self._conv2_4(conv2)
-
-        concat = paddle.concat([conv1, conv2], axis=1)
-
-        conv1 = self._conv3(concat)
-        pool1 = self._pool(concat)
-
-        concat = paddle.concat([conv1, pool1], axis=1)
-        return concat
-
-
-class InceptionA(nn.Layer):
-    def __init__(self, name):
-        super(InceptionA, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            384, 96, 1, act="relu", name="inception_a" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3"
-        )
-        self._conv4_1 = ConvBNLayer(
-            384, 64, 1, act="relu", name="inception_a" + name + "_3x3_2_reduce"
-        )
-        self._conv4_2 = ConvBNLayer(
-            64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_2"
-        )
-        self._conv4_3 = ConvBNLayer(
-            96, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_3"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-
-        conv4 = self._conv4_1(inputs)
-        conv4 = self._conv4_2(conv4)
-        conv4 = self._conv4_3(conv4)
-
-        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
-        return concat
-
-
-class ReductionA(nn.Layer):
-    def __init__(self):
-        super(ReductionA, self).__init__()
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2 = ConvBNLayer(
-            384, 384, 3, stride=2, act="relu", name="reduction_a_3x3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2"
-        )
-        self._conv3_3 = ConvBNLayer(
-            224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv2 = self._conv2(inputs)
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-        concat = paddle.concat([pool1, conv2, conv3], axis=1)
-        return concat
-
-
-class InceptionB(nn.Layer):
-    def __init__(self, name=None):
-        super(InceptionB, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            1024, 128, 1, act="relu", name="inception_b" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2"
-        )
-        self._conv3_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="inception_b" + name + "_1x7_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            192,
-            224,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7",
-        )
-        self._conv3_3 = ConvBNLayer(
-            224,
-            256,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1",
-        )
-        self._conv4_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="inception_b" + name + "_7x1_2_reduce"
-        )
-        self._conv4_2 = ConvBNLayer(
-            192,
-            192,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7_2",
-        )
-        self._conv4_3 = ConvBNLayer(
-            192,
-            224,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1_2",
-        )
-        self._conv4_4 = ConvBNLayer(
-            224,
-            224,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7_3",
-        )
-        self._conv4_5 = ConvBNLayer(
-            224,
-            256,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1_3",
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-
-        conv4 = self._conv4_1(inputs)
-        conv4 = self._conv4_2(conv4)
-        conv4 = self._conv4_3(conv4)
-        conv4 = self._conv4_4(conv4)
-        conv4 = self._conv4_5(conv4)
-
-        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
-        return concat
-
-
-class ReductionB(nn.Layer):
-    def __init__(self):
-        super(ReductionB, self).__init__()
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="reduction_b_3x3_reduce"
-        )
-        self._conv2_2 = ConvBNLayer(
-            192, 192, 3, stride=2, act="relu", name="reduction_b_3x3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            1024, 256, 1, act="relu", name="reduction_b_1x7_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            256, 256, (1, 7), padding=(0, 3), act="relu", name="reduction_b_1x7"
-        )
-        self._conv3_3 = ConvBNLayer(
-            256, 320, (7, 1), padding=(3, 0), act="relu", name="reduction_b_7x1"
-        )
-        self._conv3_4 = ConvBNLayer(
-            320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-
-        conv2 = self._conv2_1(inputs)
-        conv2 = self._conv2_2(conv2)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-        conv3 = self._conv3_4(conv3)
-
-        concat = paddle.concat([pool1, conv2, conv3], axis=1)
-
-        return concat
-
-
-class InceptionC(nn.Layer):
-    def __init__(self, name=None):
-        super(InceptionC, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2"
-        )
-        self._conv3_0 = ConvBNLayer(
-            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384,
-            256,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3",
-        )
-        self._conv3_2 = ConvBNLayer(
-            384,
-            256,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1",
-        )
-        self._conv4_0 = ConvBNLayer(
-            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4"
-        )
-        self._conv4_00 = ConvBNLayer(
-            384,
-            448,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3_2",
-        )
-        self._conv4_000 = ConvBNLayer(
-            448,
-            512,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1_2",
-        )
-        self._conv4_1 = ConvBNLayer(
-            512,
-            256,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3_3",
-        )
-        self._conv4_2 = ConvBNLayer(
-            512,
-            256,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1_3",
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_0(inputs)
-        conv3_1 = self._conv3_1(conv3)
-        conv3_2 = self._conv3_2(conv3)
-
-        conv4 = self._conv4_0(inputs)
-        conv4 = self._conv4_00(conv4)
-        conv4 = self._conv4_000(conv4)
-        conv4_1 = self._conv4_1(conv4)
-        conv4_2 = self._conv4_2(conv4)
-
-        concat = paddle.concat(
-            [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1
-        )
-
-        return concat
-
-
-class InceptionV4DY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(InceptionV4DY, self).__init__()
-        self._inception_stem = InceptionStem()
-
-        self._inceptionA_1 = InceptionA(name="1")
-        self._inceptionA_2 = InceptionA(name="2")
-        self._inceptionA_3 = InceptionA(name="3")
-        self._inceptionA_4 = InceptionA(name="4")
-        self._reductionA = ReductionA()
-
-        self._inceptionB_1 = InceptionB(name="1")
-        self._inceptionB_2 = InceptionB(name="2")
-        self._inceptionB_3 = InceptionB(name="3")
-        self._inceptionB_4 = InceptionB(name="4")
-        self._inceptionB_5 = InceptionB(name="5")
-        self._inceptionB_6 = InceptionB(name="6")
-        self._inceptionB_7 = InceptionB(name="7")
-        self._reductionB = ReductionB()
-
-        self._inceptionC_1 = InceptionC(name="1")
-        self._inceptionC_2 = InceptionC(name="2")
-        self._inceptionC_3 = InceptionC(name="3")
-
-        self.avg_pool = AdaptiveAvgPool2D(1)
-        self._drop = Dropout(p=0.2, mode="downscale_in_infer")
-        stdv = 1.0 / math.sqrt(1536 * 1.0)
-        self.out = Linear(
-            1536,
-            class_dim,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name="final_fc_weights"
-            ),
-            bias_attr=ParamAttr(name="final_fc_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._inception_stem(inputs)
-
-        x = self._inceptionA_1(x)
-        x = self._inceptionA_2(x)
-        x = self._inceptionA_3(x)
-        x = self._inceptionA_4(x)
-        x = self._reductionA(x)
-
-        x = self._inceptionB_1(x)
-        x = self._inceptionB_2(x)
-        x = self._inceptionB_3(x)
-        x = self._inceptionB_4(x)
-        x = self._inceptionB_5(x)
-        x = self._inceptionB_6(x)
-        x = self._inceptionB_7(x)
-        x = self._reductionB(x)
-
-        x = self._inceptionC_1(x)
-        x = self._inceptionC_2(x)
-        x = self._inceptionC_3(x)
-
-        x = self.avg_pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._drop(x)
-        x = self.out(x)
-        return x
-
-
-def InceptionV4(**args):
-    model = InceptionV4DY(**args)
-    return model
-
-
-def test_InceptionV4():
-    load_paddle_module_and_check(
-        InceptionV4, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv1.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv1.py
deleted file mode 100644
index 026d2502d8d881845c1f194f336970339f6892ae..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv1.py
+++ /dev/null
@@ -1,299 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import KaimingNormal
-import math
-
-__all__ = ["MobileNetV1_x0_25", "MobileNetV1_x0_5", "MobileNetV1_x0_75", "MobileNetV1"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        filter_size,
-        num_filters,
-        stride,
-        padding,
-        channels=None,
-        num_groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class DepthwiseSeparable(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters1,
-        num_filters2,
-        num_groups,
-        stride,
-        scale,
-        name=None,
-    ):
-        super(DepthwiseSeparable, self).__init__()
-
-        self._depthwise_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=int(num_filters1 * scale),
-            filter_size=3,
-            stride=stride,
-            padding=1,
-            num_groups=int(num_groups * scale),
-            name=name + "_dw",
-        )
-
-        self._pointwise_conv = ConvBNLayer(
-            num_channels=int(num_filters1 * scale),
-            filter_size=1,
-            num_filters=int(num_filters2 * scale),
-            stride=1,
-            padding=0,
-            name=name + "_sep",
-        )
-
-    def forward(self, inputs):
-        y = self._depthwise_conv(inputs)
-        y = self._pointwise_conv(y)
-        return y
-
-
-class MobileNet(nn.Layer):
-    def __init__(self, scale=1.0, class_dim=1000):
-        super(MobileNet, self).__init__()
-        self.scale = scale
-        self.block_list = []
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            filter_size=3,
-            channels=3,
-            num_filters=int(32 * scale),
-            stride=2,
-            padding=1,
-            name="conv1",
-        )
-
-        conv2_1 = self.add_sublayer(
-            "conv2_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(32 * scale),
-                num_filters1=32,
-                num_filters2=64,
-                num_groups=32,
-                stride=1,
-                scale=scale,
-                name="conv2_1",
-            ),
-        )
-        self.block_list.append(conv2_1)
-
-        conv2_2 = self.add_sublayer(
-            "conv2_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(64 * scale),
-                num_filters1=64,
-                num_filters2=128,
-                num_groups=64,
-                stride=2,
-                scale=scale,
-                name="conv2_2",
-            ),
-        )
-        self.block_list.append(conv2_2)
-
-        conv3_1 = self.add_sublayer(
-            "conv3_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(128 * scale),
-                num_filters1=128,
-                num_filters2=128,
-                num_groups=128,
-                stride=1,
-                scale=scale,
-                name="conv3_1",
-            ),
-        )
-        self.block_list.append(conv3_1)
-
-        conv3_2 = self.add_sublayer(
-            "conv3_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(128 * scale),
-                num_filters1=128,
-                num_filters2=256,
-                num_groups=128,
-                stride=2,
-                scale=scale,
-                name="conv3_2",
-            ),
-        )
-        self.block_list.append(conv3_2)
-
-        conv4_1 = self.add_sublayer(
-            "conv4_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(256 * scale),
-                num_filters1=256,
-                num_filters2=256,
-                num_groups=256,
-                stride=1,
-                scale=scale,
-                name="conv4_1",
-            ),
-        )
-        self.block_list.append(conv4_1)
-
-        conv4_2 = self.add_sublayer(
-            "conv4_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(256 * scale),
-                num_filters1=256,
-                num_filters2=512,
-                num_groups=256,
-                stride=2,
-                scale=scale,
-                name="conv4_2",
-            ),
-        )
-        self.block_list.append(conv4_2)
-
-        for i in range(5):
-            conv5 = self.add_sublayer(
-                "conv5_" + str(i + 1),
-                sublayer=DepthwiseSeparable(
-                    num_channels=int(512 * scale),
-                    num_filters1=512,
-                    num_filters2=512,
-                    num_groups=512,
-                    stride=1,
-                    scale=scale,
-                    name="conv5_" + str(i + 1),
-                ),
-            )
-            self.block_list.append(conv5)
-
-        conv5_6 = self.add_sublayer(
-            "conv5_6",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(512 * scale),
-                num_filters1=512,
-                num_filters2=1024,
-                num_groups=512,
-                stride=2,
-                scale=scale,
-                name="conv5_6",
-            ),
-        )
-        self.block_list.append(conv5_6)
-
-        conv6 = self.add_sublayer(
-            "conv6",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(1024 * scale),
-                num_filters1=1024,
-                num_filters2=1024,
-                num_groups=1024,
-                stride=1,
-                scale=scale,
-                name="conv6",
-            ),
-        )
-        self.block_list.append(conv6)
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.out = Linear(
-            int(1024 * scale),
-            class_dim,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name="fc7_weights"),
-            bias_attr=ParamAttr(name="fc7_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-
-def MobileNetV1_x0_25(**args):
-    model = MobileNet(scale=0.25, **args)
-    return model
-
-
-def MobileNetV1_x0_5(**args):
-    model = MobileNet(scale=0.5, **args)
-    return model
-
-
-def MobileNetV1_x0_75(**args):
-    model = MobileNet(scale=0.75, **args)
-    return model
-
-
-def MobileNetV1(**args):
-    model = MobileNet(scale=1.0, **args)
-    return model
-
-
-def test_MobileNetV1():
-    load_paddle_module_and_check(
-        MobileNetV1, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv2.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv2.py
deleted file mode 100644
index c5e33e9f167c64baf98262a04cf29722f925fefe..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv2.py
+++ /dev/null
@@ -1,286 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-import math
-
-__all__ = [
-    "MobileNetV2_x0_25",
-    "MobileNetV2_x0_5",
-    "MobileNetV2_x0_75",
-    "MobileNetV2",
-    "MobileNetV2_x1_5",
-    "MobileNetV2_x2_0",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        filter_size,
-        num_filters,
-        stride,
-        padding,
-        channels=None,
-        num_groups=1,
-        name=None,
-        use_cudnn=True,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            num_filters,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs, if_act=True):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if if_act:
-            y = F.relu6(y)
-        return y
-
-
-class InvertedResidualUnit(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_in_filter,
-        num_filters,
-        stride,
-        filter_size,
-        padding,
-        expansion_factor,
-        name,
-    ):
-        super(InvertedResidualUnit, self).__init__()
-        num_expfilter = int(round(num_in_filter * expansion_factor))
-        self._expand_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_expfilter,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            name=name + "_expand",
-        )
-
-        self._bottleneck_conv = ConvBNLayer(
-            num_channels=num_expfilter,
-            num_filters=num_expfilter,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            num_groups=num_expfilter,
-            use_cudnn=False,
-            name=name + "_dwise",
-        )
-
-        self._linear_conv = ConvBNLayer(
-            num_channels=num_expfilter,
-            num_filters=num_filters,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            name=name + "_linear",
-        )
-
-    def forward(self, inputs, ifshortcut):
-        y = self._expand_conv(inputs, if_act=True)
-        y = self._bottleneck_conv(y, if_act=True)
-        y = self._linear_conv(y, if_act=False)
-        if ifshortcut:
-            y = paddle.add(inputs, y)
-        return y
-
-
-class InvresiBlocks(nn.Layer):
-    def __init__(self, in_c, t, c, n, s, name):
-        super(InvresiBlocks, self).__init__()
-
-        self._first_block = InvertedResidualUnit(
-            num_channels=in_c,
-            num_in_filter=in_c,
-            num_filters=c,
-            stride=s,
-            filter_size=3,
-            padding=1,
-            expansion_factor=t,
-            name=name + "_1",
-        )
-
-        self._block_list = []
-        for i in range(1, n):
-            block = self.add_sublayer(
-                name + "_" + str(i + 1),
-                sublayer=InvertedResidualUnit(
-                    num_channels=c,
-                    num_in_filter=c,
-                    num_filters=c,
-                    stride=1,
-                    filter_size=3,
-                    padding=1,
-                    expansion_factor=t,
-                    name=name + "_" + str(i + 1),
-                ),
-            )
-            self._block_list.append(block)
-
-    def forward(self, inputs):
-        y = self._first_block(inputs, ifshortcut=False)
-        for block in self._block_list:
-            y = block(y, ifshortcut=True)
-        return y
-
-
-class MobileNet(nn.Layer):
-    def __init__(self, class_dim=1000, scale=1.0, prefix_name="", **args):
-        super(MobileNet, self).__init__()
-        self.scale = scale
-        self.class_dim = class_dim
-
-        bottleneck_params_list = [
-            (1, 16, 1, 1),
-            (6, 24, 2, 2),
-            (6, 32, 3, 2),
-            (6, 64, 4, 2),
-            (6, 96, 3, 1),
-            (6, 160, 3, 2),
-            (6, 320, 1, 1),
-        ]
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=int(32 * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            name=prefix_name + "conv1_1",
-        )
-
-        self.block_list = []
-        i = 1
-        in_c = int(32 * scale)
-        for layer_setting in bottleneck_params_list:
-            t, c, n, s = layer_setting
-            i += 1
-            block = self.add_sublayer(
-                prefix_name + "conv" + str(i),
-                sublayer=InvresiBlocks(
-                    in_c=in_c,
-                    t=t,
-                    c=int(c * scale),
-                    n=n,
-                    s=s,
-                    name=prefix_name + "conv" + str(i),
-                ),
-            )
-            self.block_list.append(block)
-            in_c = int(c * scale)
-
-        self.out_c = int(1280 * scale) if scale > 1.0 else 1280
-        self.conv9 = ConvBNLayer(
-            num_channels=in_c,
-            num_filters=self.out_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            name=prefix_name + "conv9",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.out = Linear(
-            self.out_c,
-            class_dim,
-            weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
-            bias_attr=ParamAttr(name=prefix_name + "fc10_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs, if_act=True)
-        for block in self.block_list:
-            y = block(y)
-        y = self.conv9(y, if_act=True)
-        y = self.pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-
-def MobileNetV2_x0_25(**args):
-    model = MobileNet(scale=0.25, **args)
-    return model
-
-
-def MobileNetV2_x0_5(**args):
-    model = MobileNet(scale=0.5, **args)
-    return model
-
-
-def MobileNetV2_x0_75(**args):
-    model = MobileNet(scale=0.75, **args)
-    return model
-
-
-def MobileNetV2(**args):
-    model = MobileNet(scale=1.0, **args)
-    return model
-
-
-def MobileNetV2_x1_5(**args):
-    model = MobileNet(scale=1.5, **args)
-    return model
-
-
-def MobileNetV2_x2_0(**args):
-    model = MobileNet(scale=2.0, **args)
-    return model
-
-
-def test_MobileNetV2():
-    load_paddle_module_and_check(
-        MobileNetV2, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
-
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv3.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv3.py
deleted file mode 100644
index 809fec4fac17f1614c0f303f6c1a9910f9e76b61..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_mobilenetv3.py
+++ /dev/null
@@ -1,377 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn.functional import hardswish, hardsigmoid
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.regularizer import L2Decay
-
-import math
-
-__all__ = [
-    "MobileNetV3_small_x0_35",
-    "MobileNetV3_small_x0_5",
-    "MobileNetV3_small_x0_75",
-    "MobileNetV3_small_x1_0",
-    "MobileNetV3_small_x1_25",
-    "MobileNetV3_large_x0_35",
-    "MobileNetV3_large_x0_5",
-    "MobileNetV3_large_x0_75",
-    "MobileNetV3_large_x1_0",
-    "MobileNetV3_large_x1_25",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class MobileNetV3(nn.Layer):
-    def __init__(self, scale=1.0, model_name="small", dropout_prob=0.2, class_dim=1000):
-        super(MobileNetV3, self).__init__()
-
-        inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],
-                [3, 240, 80, False, "hardswish", 2],
-                [3, 200, 80, False, "hardswish", 1],
-                [3, 184, 80, False, "hardswish", 1],
-                [3, 184, 80, False, "hardswish", 1],
-                [3, 480, 112, True, "hardswish", 1],
-                [3, 672, 112, True, "hardswish", 1],
-                [5, 672, 160, True, "hardswish", 2],
-                [5, 960, 160, True, "hardswish", 1],
-                [5, 960, 160, True, "hardswish", 1],
-            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],
-                [5, 96, 40, True, "hardswish", 2],
-                [5, 240, 40, True, "hardswish", 1],
-                [5, 240, 40, True, "hardswish", 1],
-                [5, 120, 48, True, "hardswish", 1],
-                [5, 144, 48, True, "hardswish", 1],
-                [5, 288, 96, True, "hardswish", 2],
-                [5, 576, 96, True, "hardswish", 1],
-                [5, 576, 96, True, "hardswish", 1],
-            ]
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name)
-            )
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            act="hardswish",
-            name="conv1",
-        )
-
-        self.block_list = []
-        i = 0
-        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in self.cfg:
-            block = self.add_sublayer(
-                "conv" + str(i + 2),
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2),
-                ),
-            )
-            self.block_list.append(block)
-            inplanes = make_divisible(scale * c)
-            i += 1
-
-        self.last_second_conv = ConvBNLayer(
-            in_c=inplanes,
-            out_c=make_divisible(scale * self.cls_ch_squeeze),
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act="hardswish",
-            name="conv_last",
-        )
-
-        self.pool = AdaptiveAvgPool2D(1)
-
-        self.last_conv = Conv2D(
-            in_channels=make_divisible(scale * self.cls_ch_squeeze),
-            out_channels=self.cls_ch_expand,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name="last_1x1_conv_weights"),
-            bias_attr=False,
-        )
-
-        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
-
-        self.out = Linear(
-            self.cls_ch_expand,
-            class_dim,
-            weight_attr=ParamAttr("fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs, label=None):
-        x = self.conv1(inputs)
-
-        for block in self.block_list:
-            x = block(x)
-
-        x = self.last_second_conv(x)
-        x = self.pool(x)
-
-        x = self.last_conv(x)
-        x = hardswish(x)
-        x = self.dropout(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self.out(x)
-
-        return x
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        in_c,
-        out_c,
-        filter_size,
-        stride,
-        padding,
-        num_groups=1,
-        if_act=True,
-        act=None,
-        use_cudnn=True,
-        name="",
-    ):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
-        self.conv = Conv2D(
-            in_channels=in_c,
-            out_channels=out_c,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self.bn = BatchNorm(
-            num_channels=out_c,
-            act=None,
-            param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = F.relu(x)
-            elif self.act == "hardswish":
-                x = hardswish(x)
-            else:
-                print("The activation function is selected incorrectly.")
-                exit()
-        return x
-
-
-class ResidualUnit(nn.Layer):
-    def __init__(
-        self, in_c, mid_c, out_c, filter_size, stride, use_se, act=None, name=""
-    ):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_c == out_c
-        self.if_se = use_se
-
-        self.expand_conv = ConvBNLayer(
-            in_c=in_c,
-            out_c=mid_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand",
-        )
-        self.bottleneck_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=mid_c,
-            filter_size=filter_size,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            num_groups=mid_c,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise",
-        )
-        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=out_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear",
-        )
-
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = paddle.add(inputs, x)
-        return x
-
-
-class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = AdaptiveAvgPool2D(1)
-        self.conv1 = Conv2D(
-            in_channels=channel,
-            out_channels=channel // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name=name + "_1_weights"),
-            bias_attr=ParamAttr(name=name + "_1_offset"),
-        )
-        self.conv2 = Conv2D(
-            in_channels=channel // reduction,
-            out_channels=channel,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name + "_2_weights"),
-            bias_attr=ParamAttr(name=name + "_2_offset"),
-        )
-
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
-        return paddle.multiply(x=inputs, y=outputs)
-
-
-def MobileNetV3_small_x0_35(**args):
-    model = MobileNetV3(model_name="small", scale=0.35, **args)
-    return model
-
-
-def MobileNetV3_small_x0_5(**args):
-    model = MobileNetV3(model_name="small", scale=0.5, **args)
-    return model
-
-
-def MobileNetV3_small_x0_75(**args):
-    model = MobileNetV3(model_name="small", scale=0.75, **args)
-    return model
-
-
-def MobileNetV3_small_x1_0(**args):
-    model = MobileNetV3(model_name="small", scale=1.0, **args)
-    return model
-
-
-def MobileNetV3_small_x1_25(**args):
-    model = MobileNetV3(model_name="small", scale=1.25, **args)
-    return model
-
-
-def MobileNetV3_large_x0_35(**args):
-    model = MobileNetV3(model_name="large", scale=0.35, **args)
-    return model
-
-
-def MobileNetV3_large_x0_5(**args):
-    model = MobileNetV3(model_name="large", scale=0.5, **args)
-    return model
-
-
-def MobileNetV3_large_x0_75(**args):
-    model = MobileNetV3(model_name="large", scale=0.75, **args)
-    return model
-
-
-def MobileNetV3_large_x1_0(**args):
-    model = MobileNetV3(model_name="large", scale=1.0, **args)
-    return model
-
-
-def MobileNetV3_large_x1_25(**args):
-    model = MobileNetV3(model_name="large", scale=1.25, **args)
-    return model
-
-
-def test_MobileNetV3():
-    load_paddle_module_and_check(
-        MobileNetV3, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_regnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_regnet.py
deleted file mode 100644
index 28dd6a2df73cf4bcaf737d29a8b781dc0b0d8989..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_regnet.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = [
-    "RegNetX_200MF",
-    "RegNetX_4GF",
-    "RegNetX_32GF",
-    "RegNetY_200MF",
-    "RegNetY_4GF",
-    "RegNetY_32GF",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def quantize_float(f, q):
-    """Converts a float to closest non-zero int divisible by q."""
-    return int(round(f / q) * q)
-
-
-def adjust_ws_gs_comp(ws, bms, gs):
-    """Adjusts the compatibility of widths and groups."""
-    ws_bot = [int(w * b) for w, b in zip(ws, bms)]
-    gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
-    ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
-    ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
-    return ws, gs
-
-
-def get_stages_from_blocks(ws, rs):
-    """Gets ws/ds of network at each stage from per block values."""
-    ts = [
-        w != wp or r != rp
-        for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
-    ]
-    s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
-    s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
-    return s_ws, s_ds
-
-
-def generate_regnet(w_a, w_0, w_m, d, q=8):
-    """Generates per block ws from RegNet parameters."""
-    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
-    ws_cont = np.arange(d) * w_a + w_0
-    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
-    ws = w_0 * np.power(w_m, ks)
-    ws = np.round(np.divide(ws, q)) * q
-    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
-    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
-    return ws, num_stages, max_stage, ws_cont
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        padding=0,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
-            bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"),
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + ".output.1.w_0"),
-            bias_attr=ParamAttr(bn_name + ".output.1.b_0"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        bm,
-        gw,
-        se_on,
-        se_r,
-        shortcut=True,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        # Compute the bottleneck width
-        w_b = int(round(num_filters * bm))
-        # Compute the number of groups
-        num_gs = w_b // gw
-        self.se_on = se_on
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=w_b,
-            filter_size=1,
-            padding=0,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=w_b,
-            num_filters=w_b,
-            filter_size=3,
-            stride=stride,
-            padding=1,
-            groups=num_gs,
-            act="relu",
-            name=name + "_branch2b",
-        )
-        if se_on:
-            w_se = int(round(num_channels * se_r))
-            self.se_block = SELayer(
-                num_channels=w_b,
-                num_filters=w_b,
-                reduction_ratio=w_se,
-                name=name + "_branch2se",
-            )
-        self.conv2 = ConvBNLayer(
-            num_channels=w_b,
-            num_filters=num_filters,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        if self.se_on:
-            conv1 = self.se_block(conv1)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
-        super(SELayer, self).__init__()
-
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-
-        self._num_channels = num_channels
-
-        med_ch = int(num_channels / reduction_ratio)
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_sqz_offset"),
-        )
-
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_filters,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_exc_offset"),
-        )
-
-    def forward(self, input):
-        pool = self.pool2d_gap(input)
-        pool = paddle.reshape(pool, shape=[-1, self._num_channels])
-        squeeze = self.squeeze(pool)
-        squeeze = F.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = F.sigmoid(excitation)
-        excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1])
-        out = input * excitation
-        return out
-
-
-class RegNet(nn.Layer):
-    def __init__(
-        self, w_a, w_0, w_m, d, group_w, bot_mul, q=8, se_on=False, class_dim=1000
-    ):
-        super(RegNet, self).__init__()
-
-        # Generate RegNet ws per block
-        b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q)
-        # Convert to per stage format
-        ws, ds = get_stages_from_blocks(b_ws, b_ws)
-        # Generate group widths and bot muls
-        gws = [group_w for _ in range(num_s)]
-        bms = [bot_mul for _ in range(num_s)]
-        # Adjust the compatibility of ws and gws
-        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
-        # Use the same stride for each stage
-        ss = [2 for _ in range(num_s)]
-        # Use SE for RegNetY
-        se_r = 0.25
-        # Construct the model
-        # Group params by stage
-        stage_params = list(zip(ds, ws, ss, bms, gws))
-        # Construct the stem
-        stem_type = "simple_stem_in"
-        stem_w = 32
-        block_type = "res_bottleneck_block"
-
-        self.conv = ConvBNLayer(
-            num_channels=3,
-            num_filters=stem_w,
-            filter_size=3,
-            stride=2,
-            padding=1,
-            act="relu",
-            name="stem_conv",
-        )
-
-        self.block_list = []
-        for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
-            shortcut = False
-            for i in range(d):
-                num_channels = stem_w if block == i == 0 else in_channels
-                # Stride apply to the first block of the stage
-                b_stride = stride if i == 0 else 1
-                conv_name = "s" + str(block + 1) + "_b" + str(i + 1)  # chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    conv_name,
-                    BottleneckBlock(
-                        num_channels=num_channels,
-                        num_filters=w_out,
-                        stride=b_stride,
-                        bm=bm,
-                        gw=gw,
-                        se_on=se_on,
-                        se_r=se_r,
-                        shortcut=shortcut,
-                        name=conv_name,
-                    ),
-                )
-                in_channels = w_out
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = w_out
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
-            bias_attr=ParamAttr(name="fc_0.b_0"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def RegNetX_200MF(**args):
-    model = RegNet(
-        w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetX_4GF(**args):
-    model = RegNet(
-        w_a=38.65, w_0=96, w_m=2.43, d=23, group_w=40, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetX_32GF(**args):
-    model = RegNet(
-        w_a=69.86, w_0=320, w_m=2.0, d=23, group_w=168, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetY_200MF(**args):
-    model = RegNet(
-        w_a=36.44,
-        w_0=24,
-        w_m=2.49,
-        d=13,
-        group_w=8,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def RegNetY_4GF(**args):
-    model = RegNet(
-        w_a=31.41,
-        w_0=96,
-        w_m=2.24,
-        d=22,
-        group_w=64,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def RegNetY_32GF(**args):
-    model = RegNet(
-        w_a=115.89,
-        w_0=232,
-        w_m=2.53,
-        d=20,
-        group_w=232,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def test_RegNetX_200MF():
-    load_paddle_module_and_check(
-        RegNetX_200MF, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
-
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_repvgg.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_repvgg.py
deleted file mode 100644
index 8f3b0dfb6ce4babf2cc2cca9ee0e690343131ed0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_repvgg.py
+++ /dev/null
@@ -1,384 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle.nn as nn
-import paddle
-import numpy as np
-
-__all__ = [
-    "RepVGG",
-    "RepVGG_A0",
-    "RepVGG_A1",
-    "RepVGG_A2",
-    "RepVGG_B0",
-    "RepVGG_B1",
-    "RepVGG_B2",
-    "RepVGG_B3",
-    "RepVGG_B1g2",
-    "RepVGG_B1g4",
-    "RepVGG_B2g2",
-    "RepVGG_B2g4",
-    "RepVGG_B3g2",
-    "RepVGG_B3g4",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBN(nn.Layer):
-    def __init__(
-        self, in_channels, out_channels, kernel_size, stride, padding, groups=1
-    ):
-        super(ConvBN, self).__init__()
-        self.conv = nn.Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias_attr=False,
-        )
-        self.bn = nn.BatchNorm2D(num_features=out_channels)
-
-    def forward(self, x):
-        y = self.conv(x)
-        y = self.bn(y)
-        return y
-
-
-class RepVGGBlock(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode="zeros",
-    ):
-        super(RepVGGBlock, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = padding
-        self.dilation = dilation
-        self.groups = groups
-        self.padding_mode = padding_mode
-
-        assert kernel_size == 3
-        assert padding == 1
-
-        padding_11 = padding - kernel_size // 2
-
-        self.nonlinearity = nn.ReLU()
-
-        self.rbr_identity = (
-            nn.BatchNorm2D(num_features=in_channels)
-            if out_channels == in_channels and stride == 1
-            else None
-        )
-        self.rbr_dense = ConvBN(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-        )
-        self.rbr_1x1 = ConvBN(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=stride,
-            padding=padding_11,
-            groups=groups,
-        )
-
-    def forward(self, inputs):
-        if not self.training:
-            return self.nonlinearity(self.rbr_reparam(inputs))
-
-        if self.rbr_identity is None:
-            id_out = 0
-        else:
-            id_out = self.rbr_identity(inputs)
-        return self.nonlinearity(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
-
-    def eval(self):
-        if not hasattr(self, "rbr_reparam"):
-            self.rbr_reparam = nn.Conv2D(
-                in_channels=self.in_channels,
-                out_channels=self.out_channels,
-                kernel_size=self.kernel_size,
-                stride=self.stride,
-                padding=self.padding,
-                dilation=self.dilation,
-                groups=self.groups,
-                padding_mode=self.padding_mode,
-            )
-        self.training = False
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.rbr_reparam.weight.set_value(kernel)
-        self.rbr_reparam.bias.set_value(bias)
-        for layer in self.sublayers():
-            layer.eval()
-
-    def get_equivalent_kernel_bias(self):
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
-        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
-        return (
-            kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid,
-            bias3x3 + bias1x1 + biasid,
-        )
-
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
-        if kernel1x1 is None:
-            return 0
-        else:
-            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
-
-    def _fuse_bn_tensor(self, branch):
-        if branch is None:
-            return 0, 0
-        if isinstance(branch, ConvBN):
-            kernel = branch.conv.weight
-            running_mean = branch.bn._mean
-            running_var = branch.bn._variance
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn._epsilon
-        else:
-            assert isinstance(branch, nn.BatchNorm2D)
-            if not hasattr(self, "id_tensor"):
-                input_dim = self.in_channels // self.groups
-                kernel_value = np.zeros(
-                    (self.in_channels, input_dim, 3, 3), dtype=np.float32
-                )
-                for i in range(self.in_channels):
-                    kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = paddle.to_tensor(kernel_value)
-            kernel = self.id_tensor
-            running_mean = branch._mean
-            running_var = branch._variance
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch._epsilon
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape((-1, 1, 1, 1))
-        return kernel * t, beta - running_mean * gamma / std
-
-
-class RepVGG(nn.Layer):
-    def __init__(
-        self,
-        num_blocks,
-        width_multiplier=None,
-        override_groups_map=None,
-        class_dim=1000,
-    ):
-        super(RepVGG, self).__init__()
-
-        assert len(width_multiplier) == 4
-        self.override_groups_map = override_groups_map or dict()
-
-        assert 0 not in self.override_groups_map
-
-        self.in_planes = min(64, int(64 * width_multiplier[0]))
-
-        self.stage0 = RepVGGBlock(
-            in_channels=3,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * width_multiplier[0]), num_blocks[0], stride=2
-        )
-        self.stage2 = self._make_stage(
-            int(128 * width_multiplier[1]), num_blocks[1], stride=2
-        )
-        self.stage3 = self._make_stage(
-            int(256 * width_multiplier[2]), num_blocks[2], stride=2
-        )
-        self.stage4 = self._make_stage(
-            int(512 * width_multiplier[3]), num_blocks[3], stride=2
-        )
-        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
-        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)
-
-    def _make_stage(self, planes, num_blocks, stride):
-        strides = [stride] + [1] * (num_blocks - 1)
-        blocks = []
-        for stride in strides:
-            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
-            blocks.append(
-                RepVGGBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=cur_groups,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.Sequential(*blocks)
-
-    def forward(self, x):
-        out = self.stage0(x)
-        out = self.stage1(out)
-        out = self.stage2(out)
-        out = self.stage3(out)
-        out = self.stage4(out)
-        out = self.gap(out)
-        out = paddle.flatten(out, start_axis=1)
-        out = self.linear(out)
-        return out
-
-
-optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
-g2_map = {l: 2 for l in optional_groupwise_layers}
-g4_map = {l: 4 for l in optional_groupwise_layers}
-
-
-def RepVGG_A0(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[0.75, 0.75, 0.75, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_A1(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[1, 1, 1, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_A2(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[1.5, 1.5, 1.5, 2.75],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B0(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[1, 1, 1, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B1(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B1g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B1g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def RepVGG_B2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B2g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B2g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def RepVGG_B3(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B3g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B3g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def test_RepVGG_A0():
-    load_paddle_module_and_check(
-        RepVGG_A0, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_res2net.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_res2net.py
deleted file mode 100644
index 61e6ff080073c2c959d46012ef570e0f11a55632..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_res2net.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = [
-    "Res2Net50_48w_2s",
-    "Res2Net50_26w_4s",
-    "Res2Net50_14w_8s",
-    "Res2Net50_48w_2s",
-    "Res2Net50_26w_6s",
-    "Res2Net50_26w_8s",
-    "Res2Net101_26w_4s",
-    "Res2Net152_26w_4s",
-    "Res2Net200_26w_4s",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels1,
-        num_channels2,
-        num_filters,
-        stride,
-        scales,
-        shortcut=True,
-        if_first=False,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-        self.stride = stride
-        self.scales = scales
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels1,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1_list = []
-        for s in range(scales - 1):
-            conv1 = self.add_sublayer(
-                name + "_branch2b_" + str(s + 1),
-                ConvBNLayer(
-                    num_channels=num_filters // scales,
-                    num_filters=num_filters // scales,
-                    filter_size=3,
-                    stride=stride,
-                    act="relu",
-                    name=name + "_branch2b_" + str(s + 1),
-                ),
-            )
-            self.conv1_list.append(conv1)
-        self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
-
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_channels2,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels1,
-                num_filters=num_channels2,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        xs = paddle.split(y, self.scales, 1)
-        ys = []
-        for s, conv1 in enumerate(self.conv1_list):
-            if s == 0 or self.stride == 2:
-                ys.append(conv1(xs[s]))
-            else:
-                ys.append(conv1(paddle.add(xs[s], ys[-1])))
-        if self.stride == 1:
-            ys.append(xs[-1])
-        else:
-            ys.append(self.pool2d_avg(xs[-1]))
-        conv1 = paddle.concat(ys, axis=1)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class Res2Net(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
-        super(Res2Net, self).__init__()
-
-        self.layers = layers
-        self.scales = scales
-        self.width = width
-        basic_width = self.width * self.scales
-        supported_layers = [50, 101, 152, 200]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_channels2 = [256, 512, 1024, 2048]
-        num_filters = [basic_width * t for t in [1, 2, 4, 8]]
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="conv1",
-        )
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        for block in range(len(depth)):
-            shortcut = False
-            for i in range(depth[block]):
-                if layers in [101, 152] and block == 2:
-                    if i == 0:
-                        conv_name = "res" + str(block + 2) + "a"
-                    else:
-                        conv_name = "res" + str(block + 2) + "b" + str(i)
-                else:
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels1=num_channels[block]
-                        if i == 0
-                        else num_channels2[block],
-                        num_channels2=num_channels2[block],
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        scales=scales,
-                        shortcut=shortcut,
-                        if_first=block == i == 0,
-                        name=conv_name,
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs)
-        y = self.pool2d_max(y)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def Res2Net50_48w_2s(**args):
-    model = Res2Net(layers=50, scales=2, width=48, **args)
-    return model
-
-
-def Res2Net50_26w_4s(**args):
-    model = Res2Net(layers=50, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net50_14w_8s(**args):
-    model = Res2Net(layers=50, scales=8, width=14, **args)
-    return model
-
-
-def Res2Net50_26w_6s(**args):
-    model = Res2Net(layers=50, scales=6, width=26, **args)
-    return model
-
-
-def Res2Net50_26w_8s(**args):
-    model = Res2Net(layers=50, scales=8, width=26, **args)
-    return model
-
-
-def Res2Net101_26w_4s(**args):
-    model = Res2Net(layers=101, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net152_26w_4s(**args):
-    model = Res2Net(layers=152, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net200_26w_4s(**args):
-    model = Res2Net(layers=200, scales=4, width=26, **args)
-    return model
-
-
-def test_Res2Net50_48w_2s():
-    load_paddle_module_and_check(
-        Res2Net50_48w_2s, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_resnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_resnet.py
deleted file mode 100644
index a67c9d133b44f5033b1817b9befdf7f178c12320..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_resnet.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-            data_format=data_format,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-            data_layout=data_format,
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        shortcut=True,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-            data_format=data_format,
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2b",
-            data_format=data_format,
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-            data_format=data_format,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-                data_format=data_format,
-            )
-
-        self.shortcut = shortcut
-
-        self._num_channels_out = num_filters * 4
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        shortcut=True,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2a",
-            data_format=data_format,
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b",
-            data_format=data_format,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-                data_format=data_format,
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv1)
-        y = F.relu(y)
-        return y
-
-
-class ResNet(nn.Layer):
-    def __init__(
-        self, layers=50, class_dim=1000, input_image_channel=3, data_format="NCHW"
-    ):
-        super(ResNet, self).__init__()
-
-        self.layers = layers
-        self.data_format = data_format
-        self.input_image_channel = input_image_channel
-
-        supported_layers = [18, 34, 50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-
-        self.conv = ConvBNLayer(
-            num_channels=self.input_image_channel,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="conv1",
-            data_format=self.data_format,
-        )
-        self.pool2d_max = MaxPool2D(
-            kernel_size=3, stride=2, padding=1, data_format=self.data_format
-        )
-
-        self.block_list = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = self.add_sublayer(
-                        conv_name,
-                        BottleneckBlock(
-                            num_channels=num_channels[block]
-                            if i == 0
-                            else num_filters[block] * 4,
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            name=conv_name,
-                            data_format=self.data_format,
-                        ),
-                    )
-                    self.block_list.append(bottleneck_block)
-                    shortcut = True
-        else:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = self.add_sublayer(
-                        conv_name,
-                        BasicBlock(
-                            num_channels=num_channels[block]
-                            if i == 0
-                            else num_filters[block],
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            name=conv_name,
-                            data_format=self.data_format,
-                        ),
-                    )
-                    self.block_list.append(basic_block)
-                    shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
-            bias_attr=ParamAttr(name="fc_0.b_0"),
-        )
-
-    def forward(self, inputs):
-        with paddle.static.amp.fp16_guard():
-            if self.data_format == "NHWC":
-                inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
-                inputs.stop_gradient = True
-            y = self.conv(inputs)
-            y = self.pool2d_max(y)
-            for block in self.block_list:
-                y = block(y)
-            y = self.pool2d_avg(y)
-            y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-            y = self.out(y)
-            return y
-
-
-def ResNet18(**args):
-    model = ResNet(layers=18, **args)
-    return model
-
-
-def ResNet34(**args):
-    model = ResNet(layers=34, **args)
-    return model
-
-
-def ResNet50(**args):
-    model = ResNet(layers=50, **args)
-    return model
-
-
-def ResNet101(**args):
-    model = ResNet(layers=101, **args)
-    return model
-
-
-def ResNet152(**args):
-    model = ResNet(layers=152, **args)
-    return model
-
-
-def test_ResNet18():
-    load_paddle_module_and_check(
-        ResNet18, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_resnext.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_resnext.py
deleted file mode 100644
index ee4f8b18da8cfe25514df790a04a776406983baf..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_resnext.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = [
-    "ResNeXt50_32x4d",
-    "ResNeXt50_64x4d",
-    "ResNeXt101_32x4d",
-    "ResNeXt101_64x4d",
-    "ResNeXt152_32x4d",
-    "ResNeXt152_64x4d",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self, num_channels, num_filters, stride, cardinality, shortcut=True, name=None
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            groups=cardinality,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2b",
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
-        super(ResNeXt, self).__init__()
-
-        self.layers = layers
-        self.cardinality = cardinality
-        supported_layers = [50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        supported_cardinality = [32, 64]
-        assert (
-            cardinality in supported_cardinality
-        ), "supported cardinality is {} but input cardinality is {}".format(
-            supported_cardinality, cardinality
-        )
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_filters = (
-            [128, 256, 512, 1024] if cardinality == 32 else [256, 512, 1024, 2048]
-        )
-
-        self.conv = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="res_conv1",
-        )
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        for block in range(len(depth)):
-            shortcut = False
-            for i in range(depth[block]):
-                if layers in [101, 152] and block == 2:
-                    if i == 0:
-                        conv_name = "res" + str(block + 2) + "a"
-                    else:
-                        conv_name = "res" + str(block + 2) + "b" + str(i)
-                else:
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels=num_channels[block]
-                        if i == 0
-                        else num_filters[block] * int(64 // self.cardinality),
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        cardinality=self.cardinality,
-                        shortcut=shortcut,
-                        name=conv_name,
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        y = self.pool2d_max(y)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def ResNeXt50_32x4d(**args):
-    model = ResNeXt(layers=50, cardinality=32, **args)
-    return model
-
-
-def ResNeXt50_64x4d(**args):
-    model = ResNeXt(layers=50, cardinality=64, **args)
-    return model
-
-
-def ResNeXt101_32x4d(**args):
-    model = ResNeXt(layers=101, cardinality=32, **args)
-    return model
-
-
-def ResNeXt101_64x4d(**args):
-    model = ResNeXt(layers=101, cardinality=64, **args)
-    return model
-
-
-def ResNeXt152_32x4d(**args):
-    model = ResNeXt(layers=152, cardinality=32, **args)
-    return model
-
-
-def ResNeXt152_64x4d(**args):
-    model = ResNeXt(layers=152, cardinality=64, **args)
-    return model
-
-
-def test_ResNeXt50_32x4d():
-    load_paddle_module_and_check(
-        ResNeXt50_32x4d, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_se_resnext.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_se_resnext.py
deleted file mode 100644
index 77536dfa420ab4830185eaa09abae73b79790f23..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_se_resnext.py
+++ /dev/null
@@ -1,325 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["SE_ResNeXt50_32x4d", "SE_ResNeXt101_32x4d", "SE_ResNeXt152_64x4d"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        cardinality,
-        reduction_ratio,
-        shortcut=True,
-        if_first=False,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name="conv" + name + "_x1",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            groups=cardinality,
-            stride=stride,
-            act="relu",
-            name="conv" + name + "_x2",
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            filter_size=1,
-            act=None,
-            name="conv" + name + "_x3",
-        )
-        self.scale = SELayer(
-            num_channels=num_filters * 2 if cardinality == 32 else num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            reduction_ratio=reduction_ratio,
-            name="fc" + name,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-                filter_size=1,
-                stride=stride,
-                name="conv" + name + "_prj",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-        scale = self.scale(conv2)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=scale)
-        y = F.relu(y)
-        return y
-
-
-class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
-        super(SELayer, self).__init__()
-
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-
-        self._num_channels = num_channels
-
-        med_ch = int(num_channels / reduction_ratio)
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_sqz_offset"),
-        )
-        self.relu = nn.ReLU()
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_filters,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_exc_offset"),
-        )
-        self.sigmoid = nn.Sigmoid()
-
-    def forward(self, input):
-        pool = self.pool2d_gap(input)
-        pool = paddle.squeeze(pool, axis=[2, 3])
-        squeeze = self.squeeze(pool)
-        squeeze = self.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = self.sigmoid(excitation)
-        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
-        out = input * excitation
-        return out
-
-
-class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
-        super(ResNeXt, self).__init__()
-
-        self.layers = layers
-        self.cardinality = cardinality
-        self.reduction_ratio = 16
-        supported_layers = [50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        supported_cardinality = [32, 64]
-        assert (
-            cardinality in supported_cardinality
-        ), "supported cardinality is {} but input cardinality is {}".format(
-            supported_cardinality, cardinality
-        )
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_filters = (
-            [128, 256, 512, 1024] if cardinality == 32 else [256, 512, 1024, 2048]
-        )
-        if layers < 152:
-            self.conv = ConvBNLayer(
-                num_channels=3,
-                num_filters=64,
-                filter_size=7,
-                stride=2,
-                act="relu",
-                name="conv1",
-            )
-        else:
-            self.conv1_1 = ConvBNLayer(
-                num_channels=3,
-                num_filters=64,
-                filter_size=3,
-                stride=2,
-                act="relu",
-                name="conv1",
-            )
-            self.conv1_2 = ConvBNLayer(
-                num_channels=64,
-                num_filters=64,
-                filter_size=3,
-                stride=1,
-                act="relu",
-                name="conv2",
-            )
-            self.conv1_3 = ConvBNLayer(
-                num_channels=64,
-                num_filters=128,
-                filter_size=3,
-                stride=1,
-                act="relu",
-                name="conv3",
-            )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        n = 1 if layers == 50 or layers == 101 else 3
-        for block in range(len(depth)):
-            n += 1
-            shortcut = False
-            for i in range(depth[block]):
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels=num_channels[block]
-                        if i == 0
-                        else num_filters[block] * int(64 // self.cardinality),
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        cardinality=self.cardinality,
-                        reduction_ratio=self.reduction_ratio,
-                        shortcut=shortcut,
-                        if_first=block == 0,
-                        name=str(n) + "_" + str(i + 1),
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-
-    def forward(self, inputs):
-        if self.layers < 152:
-            y = self.conv(inputs)
-        else:
-            y = self.conv1_1(inputs)
-            y = self.conv1_2(y)
-            y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def SE_ResNeXt50_32x4d(**args):
-    model = ResNeXt(layers=50, cardinality=32, **args)
-    return model
-
-
-def SE_ResNeXt101_32x4d(**args):
-    model = ResNeXt(layers=101, cardinality=32, **args)
-    return model
-
-
-def SE_ResNeXt152_64x4d(**args):
-    model = ResNeXt(layers=152, cardinality=64, **args)
-    return model
-
-
-def test_SE_ResNeXt50_32x4d():
-    load_paddle_module_and_check(
-        SE_ResNeXt50_32x4d, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_shufflenet_v2.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_shufflenet_v2.py
deleted file mode 100644
index 803a147e72239bf1931630ac6493e42b2e0d0a88..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_shufflenet_v2.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddle import ParamAttr, reshape, transpose, concat, split
-from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
-from paddle.nn.initializer import KaimingNormal
-from paddle.nn.functional import swish
-
-__all__ = [
-    "ShuffleNetV2_x0_25",
-    "ShuffleNetV2_x0_33",
-    "ShuffleNetV2_x0_5",
-    "ShuffleNetV2_x1_0",
-    "ShuffleNetV2_x1_5",
-    "ShuffleNetV2_x2_0",
-    "ShuffleNetV2_swish",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def channel_shuffle(x, groups):
-    batch_size, num_channels, height, width = x.shape[0:4]
-    channels_per_group = num_channels // groups
-
-    # reshape
-    x = reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width])
-
-    # transpose
-    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
-
-    # flatten
-    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
-    return x
-
-
-class ConvBNLayer(Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            out_channels,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            act=act,
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InvertedResidual(Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu", name=None):
-        super(InvertedResidual, self).__init__()
-        self._conv_pw = ConvBNLayer(
-            in_channels=in_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv1",
-        )
-        self._conv_dw = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None,
-            name="stage_" + name + "_conv2",
-        )
-        self._conv_linear = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv3",
-        )
-
-    def forward(self, inputs):
-        x1, x2 = split(
-            inputs, num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], axis=1
-        )
-        x2 = self._conv_pw(x2)
-        x2 = self._conv_dw(x2)
-        x2 = self._conv_linear(x2)
-        out = concat([x1, x2], axis=1)
-        return channel_shuffle(out, 2)
-
-
-class InvertedResidualDS(Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu", name=None):
-        super(InvertedResidualDS, self).__init__()
-
-        # branch1
-        self._conv_dw_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=in_channels,
-            act=None,
-            name="stage_" + name + "_conv4",
-        )
-        self._conv_linear_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv5",
-        )
-        # branch2
-        self._conv_pw_2 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv1",
-        )
-        self._conv_dw_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None,
-            name="stage_" + name + "_conv2",
-        )
-        self._conv_linear_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv3",
-        )
-
-    def forward(self, inputs):
-        x1 = self._conv_dw_1(inputs)
-        x1 = self._conv_linear_1(x1)
-        x2 = self._conv_pw_2(inputs)
-        x2 = self._conv_dw_2(x2)
-        x2 = self._conv_linear_2(x2)
-        out = concat([x1, x2], axis=1)
-
-        return channel_shuffle(out, 2)
-
-
-class ShuffleNet(Layer):
-    def __init__(self, class_dim=1000, scale=1.0, act="relu"):
-        super(ShuffleNet, self).__init__()
-        self.scale = scale
-        self.class_dim = class_dim
-        stage_repeats = [4, 8, 4]
-
-        if scale == 0.25:
-            stage_out_channels = [-1, 24, 24, 48, 96, 512]
-        elif scale == 0.33:
-            stage_out_channels = [-1, 24, 32, 64, 128, 512]
-        elif scale == 0.5:
-            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
-        elif scale == 1.0:
-            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
-        elif scale == 1.5:
-            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
-        elif scale == 2.0:
-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
-        else:
-            raise NotImplementedError(
-                "This scale size:[" + str(scale) + "] is not implemented!"
-            )
-        # 1. conv1
-        self._conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=stage_out_channels[1],
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            act=act,
-            name="stage1_conv",
-        )
-        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        # 2. bottleneck sequences
-        self._block_list = []
-        for stage_id, num_repeat in enumerate(stage_repeats):
-            for i in range(num_repeat):
-                if i == 0:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + "_" + str(i + 1),
-                        sublayer=InvertedResidualDS(
-                            in_channels=stage_out_channels[stage_id + 1],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=2,
-                            act=act,
-                            name=str(stage_id + 2) + "_" + str(i + 1),
-                        ),
-                    )
-                else:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + "_" + str(i + 1),
-                        sublayer=InvertedResidual(
-                            in_channels=stage_out_channels[stage_id + 2],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=1,
-                            act=act,
-                            name=str(stage_id + 2) + "_" + str(i + 1),
-                        ),
-                    )
-                self._block_list.append(block)
-        # 3. last_conv
-        self._last_conv = ConvBNLayer(
-            in_channels=stage_out_channels[-2],
-            out_channels=stage_out_channels[-1],
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            act=act,
-            name="conv5",
-        )
-        # 4. pool
-        self._pool2d_avg = AdaptiveAvgPool2D(1)
-        self._out_c = stage_out_channels[-1]
-        # 5. fc
-        self._fc = Linear(
-            stage_out_channels[-1],
-            class_dim,
-            weight_attr=ParamAttr(name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self._conv1(inputs)
-        y = self._max_pool(y)
-        for inv in self._block_list:
-            y = inv(y)
-        y = self._last_conv(y)
-        y = self._pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self._fc(y)
-        return y
-
-
-def ShuffleNetV2_x0_25(**args):
-    model = ShuffleNet(scale=0.25, **args)
-    return model
-
-
-def ShuffleNetV2_x0_33(**args):
-    model = ShuffleNet(scale=0.33, **args)
-    return model
-
-
-def ShuffleNetV2_x0_5(**args):
-    model = ShuffleNet(scale=0.5, **args)
-    return model
-
-
-def ShuffleNetV2_x1_0(**args):
-    model = ShuffleNet(scale=1.0, **args)
-    return model
-
-
-def ShuffleNetV2_x1_5(**args):
-    model = ShuffleNet(scale=1.5, **args)
-    return model
-
-
-def ShuffleNetV2_x2_0(**args):
-    model = ShuffleNet(scale=2.0, **args)
-    return model
-
-
-def ShuffleNetV2_swish(**args):
-    model = ShuffleNet(scale=1.0, act="swish", **args)
-    return model
-
-
-def test_ShuffleNetV2_x0_25():
-    load_paddle_module_and_check(
-        ShuffleNetV2_x0_25, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_squeezenet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_squeezenet.py
deleted file mode 100644
index c969ccb18078d4e568713ac5b45ad767b2b1fff1..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_squeezenet.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["SqueezeNet1_0", "SqueezeNet1_1"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class MakeFireConv(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, filter_size, padding=0, name=None
-    ):
-        super(MakeFireConv, self).__init__()
-        self._conv = Conv2D(
-            input_channels,
-            output_channels,
-            filter_size,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=ParamAttr(name=name + "_offset"),
-        )
-
-    def forward(self, x):
-        x = self._conv(x)
-        x = F.relu(x)
-        return x
-
-
-class MakeFire(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        squeeze_channels,
-        expand1x1_channels,
-        expand3x3_channels,
-        name=None,
-    ):
-        super(MakeFire, self).__init__()
-        self._conv = MakeFireConv(
-            input_channels, squeeze_channels, 1, name=name + "_squeeze1x1"
-        )
-        self._conv_path1 = MakeFireConv(
-            squeeze_channels, expand1x1_channels, 1, name=name + "_expand1x1"
-        )
-        self._conv_path2 = MakeFireConv(
-            squeeze_channels, expand3x3_channels, 3, padding=1, name=name + "_expand3x3"
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x1 = self._conv_path1(x)
-        x2 = self._conv_path2(x)
-        return paddle.concat([x1, x2], axis=1)
-
-
-class SqueezeNet(nn.Layer):
-    def __init__(self, version, class_dim=1000):
-        super(SqueezeNet, self).__init__()
-        self.version = version
-
-        if self.version == "1.0":
-            self._conv = Conv2D(
-                3,
-                96,
-                7,
-                stride=2,
-                weight_attr=ParamAttr(name="conv1_weights"),
-                bias_attr=ParamAttr(name="conv1_offset"),
-            )
-            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-            self._conv1 = MakeFire(96, 16, 64, 64, name="fire2")
-            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
-            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
-
-            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
-            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
-            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
-            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
-
-            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
-        else:
-            self._conv = Conv2D(
-                3,
-                64,
-                3,
-                stride=2,
-                padding=1,
-                weight_attr=ParamAttr(name="conv1_weights"),
-                bias_attr=ParamAttr(name="conv1_offset"),
-            )
-            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-            self._conv1 = MakeFire(64, 16, 64, 64, name="fire2")
-            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
-
-            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
-            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
-
-            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
-            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
-            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
-            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._conv9 = Conv2D(
-            512,
-            class_dim,
-            1,
-            weight_attr=ParamAttr(name="conv10_weights"),
-            bias_attr=ParamAttr(name="conv10_offset"),
-        )
-        self._avg_pool = AdaptiveAvgPool2D(1)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = F.relu(x)
-        x = self._pool(x)
-        if self.version == "1.0":
-            x = self._conv1(x)
-            x = self._conv2(x)
-            x = self._conv3(x)
-            x = self._pool(x)
-            x = self._conv4(x)
-            x = self._conv5(x)
-            x = self._conv6(x)
-            x = self._conv7(x)
-            x = self._pool(x)
-            x = self._conv8(x)
-        else:
-            x = self._conv1(x)
-            x = self._conv2(x)
-            x = self._pool(x)
-            x = self._conv3(x)
-            x = self._conv4(x)
-            x = self._pool(x)
-            x = self._conv5(x)
-            x = self._conv6(x)
-            x = self._conv7(x)
-            x = self._conv8(x)
-        x = self._drop(x)
-        x = self._conv9(x)
-        x = F.relu(x)
-        x = self._avg_pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        return x
-
-
-def SqueezeNet1_0(**args):
-    model = SqueezeNet(version="1.0", **args)
-    return model
-
-
-def SqueezeNet1_1(**args):
-    model = SqueezeNet(version="1.1", **args)
-    return model
-
-
-def test_SqueezeNet1_0():
-    load_paddle_module_and_check(
-        SqueezeNet1_0, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_vggnet.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_vggnet.py
deleted file mode 100644
index 99c79e2a4025b3a6ca937a7b18e537ee6a9d61bc..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_vggnet.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["VGG11", "VGG13", "VGG16", "VGG19"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, groups, name=None):
-        super(ConvBlock, self).__init__()
-
-        self.groups = groups
-        self._conv_1 = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(name=name + "1_weights"),
-            bias_attr=False,
-        )
-        if groups == 2 or groups == 3 or groups == 4:
-            self._conv_2 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "2_weights"),
-                bias_attr=False,
-            )
-        if groups == 3 or groups == 4:
-            self._conv_3 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "3_weights"),
-                bias_attr=False,
-            )
-        if groups == 4:
-            self._conv_4 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "4_weights"),
-                bias_attr=False,
-            )
-
-        self._pool = MaxPool2D(kernel_size=2, stride=2, padding=0)
-
-    def forward(self, inputs):
-        x = self._conv_1(inputs)
-        x = F.relu(x)
-        if self.groups == 2 or self.groups == 3 or self.groups == 4:
-            x = self._conv_2(x)
-            x = F.relu(x)
-        if self.groups == 3 or self.groups == 4:
-            x = self._conv_3(x)
-            x = F.relu(x)
-        if self.groups == 4:
-            x = self._conv_4(x)
-            x = F.relu(x)
-        x = self._pool(x)
-        return x
-
-
-class VGGNet(nn.Layer):
-    def __init__(self, layers=11, stop_grad_layers=0, class_dim=1000):
-        super(VGGNet, self).__init__()
-
-        self.layers = layers
-        self.stop_grad_layers = stop_grad_layers
-        self.vgg_configure = {
-            11: [1, 1, 2, 2, 2],
-            13: [2, 2, 2, 2, 2],
-            16: [2, 2, 3, 3, 3],
-            19: [2, 2, 4, 4, 4],
-        }
-        assert (
-            self.layers in self.vgg_configure.keys()
-        ), "supported layers are {} but input layer is {}".format(
-            self.vgg_configure.keys(), layers
-        )
-        self.groups = self.vgg_configure[self.layers]
-
-        self._conv_block_1 = ConvBlock(3, 64, self.groups[0], name="conv1_")
-        self._conv_block_2 = ConvBlock(64, 128, self.groups[1], name="conv2_")
-        self._conv_block_3 = ConvBlock(128, 256, self.groups[2], name="conv3_")
-        self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_")
-        self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_")
-
-        for idx, block in enumerate(
-            [
-                self._conv_block_1,
-                self._conv_block_2,
-                self._conv_block_3,
-                self._conv_block_4,
-                self._conv_block_5,
-            ]
-        ):
-            if self.stop_grad_layers >= idx + 1:
-                for param in block.parameters():
-                    param.trainable = False
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc1 = Linear(
-            7 * 7 * 512,
-            4096,
-            weight_attr=ParamAttr(name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-        self._fc2 = Linear(
-            4096,
-            4096,
-            weight_attr=ParamAttr(name="fc7_weights"),
-            bias_attr=ParamAttr(name="fc7_offset"),
-        )
-        self._out = Linear(
-            4096,
-            class_dim,
-            weight_attr=ParamAttr(name="fc8_weights"),
-            bias_attr=ParamAttr(name="fc8_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv_block_1(inputs)
-        x = self._conv_block_2(x)
-        x = self._conv_block_3(x)
-        x = self._conv_block_4(x)
-        x = self._conv_block_5(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self._fc1(x)
-        x = F.relu(x)
-        x = self._drop(x)
-        x = self._fc2(x)
-        x = F.relu(x)
-        x = self._drop(x)
-        x = self._out(x)
-        return x
-
-
-def VGG11(**args):
-    model = VGGNet(layers=11, **args)
-    return model
-
-
-def VGG13(**args):
-    model = VGGNet(layers=13, **args)
-    return model
-
-
-def VGG16(**args):
-    model = VGGNet(layers=16, **args)
-    return model
-
-
-def VGG19(**args):
-    model = VGGNet(layers=19, **args)
-    return model
-
-
-def test_VGG16():
-    load_paddle_module_and_check(
-        VGG11, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_vision_transformer.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_vision_transformer.py
deleted file mode 100644
index 0d06e25085af6a6012bdcf452caa4912c7d722b2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_vision_transformer.py
+++ /dev/null
@@ -1,444 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import paddle
-import paddle.nn as nn
-from paddle.nn.initializer import TruncatedNormal, Constant
-
-__all__ = [
-    "VisionTransformer",
-    "ViT_small_patch16_224",
-    "ViT_base_patch16_224",
-    "ViT_base_patch16_384",
-    "ViT_base_patch32_384",
-    "ViT_large_patch16_224",
-    "ViT_large_patch16_384",
-    "ViT_large_patch32_384",
-    "ViT_huge_patch16_224",
-    "ViT_huge_patch32_384",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-trunc_normal_ = TruncatedNormal(std=0.02)
-zeros_ = Constant(value=0.0)
-ones_ = Constant(value=1.0)
-
-
-def to_2tuple(x):
-    return tuple([x] * 2)
-
-
-def drop_path(x, drop_prob=0.0, training=False):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
-    """
-    if drop_prob == 0.0 or not training:
-        return x
-    keep_prob = paddle.to_tensor(1 - drop_prob)
-    shape = (paddle.shape(x)[0],) + (1,) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
-    random_tensor = paddle.floor(random_tensor)  # binarize
-    output = x.divide(keep_prob) * random_tensor
-    return output
-
-
-class DropPath(nn.Layer):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-
-class Identity(nn.Layer):
-    def __init__(self):
-        super(Identity, self).__init__()
-
-    def forward(self, input):
-        return input
-
-
-class Mlp(nn.Layer):
-    def __init__(
-        self,
-        in_features,
-        hidden_features=None,
-        out_features=None,
-        act_layer=nn.GELU,
-        drop=0.0,
-    ):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class Attention(nn.Layer):
-    def __init__(
-        self,
-        dim,
-        num_heads=8,
-        qkv_bias=False,
-        qk_scale=None,
-        attn_drop=0.0,
-        proj_drop=0.0,
-    ):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = qk_scale or head_dim ** -0.5
-
-        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-
-    def forward(self, x):
-        # B= paddle.shape(x)[0]
-        N, C = x.shape[1:]
-        qkv = (
-            self.qkv(x)
-            .reshape((-1, N, 3, self.num_heads, C // self.num_heads))
-            .transpose((2, 0, 3, 1, 4))
-        )
-        q, k, v = qkv[0], qkv[1], qkv[2]
-
-        attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
-        attn = nn.functional.softmax(attn, axis=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C))
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Layer):
-    def __init__(
-        self,
-        dim,
-        num_heads,
-        mlp_ratio=4.0,
-        qkv_bias=False,
-        qk_scale=None,
-        drop=0.0,
-        attn_drop=0.0,
-        drop_path=0.0,
-        act_layer=nn.GELU,
-        norm_layer="nn.LayerNorm",
-        epsilon=1e-5,
-    ):
-        super().__init__()
-        self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
-        self.attn = Attention(
-            dim,
-            num_heads=num_heads,
-            qkv_bias=qkv_bias,
-            qk_scale=qk_scale,
-            attn_drop=attn_drop,
-            proj_drop=drop,
-        )
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
-        self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(
-            in_features=dim,
-            hidden_features=mlp_hidden_dim,
-            act_layer=act_layer,
-            drop=drop,
-        )
-
-    def forward(self, x):
-        x = x + self.drop_path(self.attn(self.norm1(x)))
-        x = x + self.drop_path(self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Layer):
-    """ Image to Patch Embedding
-    """
-
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.num_patches = num_patches
-
-        self.proj = nn.Conv2D(
-            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
-        )
-
-    def forward(self, x):
-        B, C, H, W = x.shape
-        assert (
-            H == self.img_size[0] and W == self.img_size[1]
-        ), "Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
-
-        x = self.proj(x).flatten(2).transpose((0, 2, 1))
-        return x
-
-
-class VisionTransformer(nn.Layer):
-    """ Vision Transformer with support for patch input
-    """
-
-    def __init__(
-        self,
-        img_size=224,
-        patch_size=16,
-        in_chans=3,
-        class_dim=1000,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=False,
-        qk_scale=None,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.0,
-        norm_layer="nn.LayerNorm",
-        epsilon=1e-5,
-        **args
-    ):
-        super().__init__()
-        self.class_dim = class_dim
-
-        self.num_features = self.embed_dim = embed_dim
-
-        self.patch_embed = PatchEmbed(
-            img_size=img_size,
-            patch_size=patch_size,
-            in_chans=in_chans,
-            embed_dim=embed_dim,
-        )
-        num_patches = self.patch_embed.num_patches
-
-        self.pos_embed = self.create_parameter(
-            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_
-        )
-        self.add_parameter("pos_embed", self.pos_embed)
-        self.cls_token = self.create_parameter(
-            shape=(1, 1, embed_dim), default_initializer=zeros_
-        )
-        self.add_parameter("cls_token", self.cls_token)
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        dpr = np.linspace(0, drop_path_rate, depth)
-
-        self.blocks = nn.LayerList(
-            [
-                Block(
-                    dim=embed_dim,
-                    num_heads=num_heads,
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    attn_drop=attn_drop_rate,
-                    drop_path=dpr[i],
-                    norm_layer=norm_layer,
-                    epsilon=epsilon,
-                )
-                for i in range(depth)
-            ]
-        )
-
-        self.norm = eval(norm_layer)(embed_dim, epsilon=epsilon)
-
-        # Classifier head
-        self.head = nn.Linear(embed_dim, class_dim) if class_dim > 0 else Identity()
-
-        # TODO(littletomatodonkey): same init in static mode
-        if paddle.in_dynamic_mode():
-            trunc_normal_(self.pos_embed)
-            trunc_normal_(self.cls_token)
-            self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                zeros_(m.bias)
-        elif isinstance(m, nn.LayerNorm):
-            zeros_(m.bias)
-            ones_(m.weight)
-
-    def forward_features(self, x):
-        # B = x.shape[0]
-        B = paddle.shape(x)[0]
-        x = self.patch_embed(x)
-        cls_tokens = self.cls_token.expand((B, -1, -1))
-        x = paddle.concat((cls_tokens, x), axis=1)
-        x = x + self.pos_embed
-        x = self.pos_drop(x)
-        for blk in self.blocks:
-            x = blk(x)
-        x = self.norm(x)
-        return x[:, 0]
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        x = self.head(x)
-        return x
-
-
-def ViT_small_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=768,
-        depth=8,
-        num_heads=8,
-        mlp_ratio=3,
-        qk_scale=768 ** -0.5,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch16_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch16_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_huge_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, **kwargs
-    )
-    return model
-
-
-def ViT_huge_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=1280,
-        depth=32,
-        num_heads=16,
-        mlp_ratio=4,
-        **kwargs
-    )
-    return model
-
-
-def test_ViT_small_patch16_224():
-    load_paddle_module_and_check(
-        ViT_small_patch16_224, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_xception.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_xception.py
deleted file mode 100644
index 16861bca01064696e5014a22b38fd00568edafac..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_xception.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-import sys
-
-__all__ = ["Xception41", "Xception65", "Xception71"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = "bn_" + name
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class SeparableConv(nn.Layer):
-    def __init__(self, input_channels, output_channels, stride=1, name=None):
-        super(SeparableConv, self).__init__()
-
-        self._pointwise_conv = ConvBNLayer(
-            input_channels, output_channels, 1, name=name + "_sep"
-        )
-        self._depthwise_conv = ConvBNLayer(
-            output_channels,
-            output_channels,
-            3,
-            stride=stride,
-            groups=output_channels,
-            name=name + "_dw",
-        )
-
-    def forward(self, inputs):
-        x = self._pointwise_conv(inputs)
-        x = self._depthwise_conv(x)
-        return x
-
-
-class EntryFlowBottleneckBlock(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, stride=2, name=None, relu_first=False
-    ):
-        super(EntryFlowBottleneckBlock, self).__init__()
-        self.relu_first = relu_first
-
-        self._short = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=1,
-            stride=stride,
-            padding=0,
-            weight_attr=ParamAttr(name + "_branch1_weights"),
-            bias_attr=False,
-        )
-        self._conv1 = SeparableConv(
-            input_channels, output_channels, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv2 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2b_weights"
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=stride, padding=1)
-
-    def forward(self, inputs):
-        conv0 = inputs
-        short = self._short(inputs)
-        if self.relu_first:
-            conv0 = F.relu(conv0)
-        conv1 = self._conv1(conv0)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv2(conv2)
-        pool = self._pool(conv2)
-        return paddle.add(x=short, y=pool)
-
-
-class EntryFlow(nn.Layer):
-    def __init__(self, block_num=3):
-        super(EntryFlow, self).__init__()
-
-        name = "entry_flow"
-        self.block_num = block_num
-        self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1")
-        self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2")
-        if block_num == 3:
-            self._conv_0 = EntryFlowBottleneckBlock(
-                64, 128, stride=2, name=name + "_0", relu_first=False
-            )
-            self._conv_1 = EntryFlowBottleneckBlock(
-                128, 256, stride=2, name=name + "_1", relu_first=True
-            )
-            self._conv_2 = EntryFlowBottleneckBlock(
-                256, 728, stride=2, name=name + "_2", relu_first=True
-            )
-        elif block_num == 5:
-            self._conv_0 = EntryFlowBottleneckBlock(
-                64, 128, stride=2, name=name + "_0", relu_first=False
-            )
-            self._conv_1 = EntryFlowBottleneckBlock(
-                128, 256, stride=1, name=name + "_1", relu_first=True
-            )
-            self._conv_2 = EntryFlowBottleneckBlock(
-                256, 256, stride=2, name=name + "_2", relu_first=True
-            )
-            self._conv_3 = EntryFlowBottleneckBlock(
-                256, 728, stride=1, name=name + "_3", relu_first=True
-            )
-            self._conv_4 = EntryFlowBottleneckBlock(
-                728, 728, stride=2, name=name + "_4", relu_first=True
-            )
-        else:
-            sys.exit(-1)
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-
-        if self.block_num == 3:
-            x = self._conv_0(x)
-            x = self._conv_1(x)
-            x = self._conv_2(x)
-        elif self.block_num == 5:
-            x = self._conv_0(x)
-            x = self._conv_1(x)
-            x = self._conv_2(x)
-            x = self._conv_3(x)
-            x = self._conv_4(x)
-        return x
-
-
-class MiddleFlowBottleneckBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, name):
-        super(MiddleFlowBottleneckBlock, self).__init__()
-
-        self._conv_0 = SeparableConv(
-            input_channels, output_channels, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv_1 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2b_weights"
-        )
-        self._conv_2 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2c_weights"
-        )
-
-    def forward(self, inputs):
-        conv0 = F.relu(inputs)
-        conv0 = self._conv_0(conv0)
-        conv1 = F.relu(conv0)
-        conv1 = self._conv_1(conv1)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv_2(conv2)
-        return paddle.add(x=inputs, y=conv2)
-
-
-class MiddleFlow(nn.Layer):
-    def __init__(self, block_num=8):
-        super(MiddleFlow, self).__init__()
-
-        self.block_num = block_num
-        self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0")
-        self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1")
-        self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2")
-        self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3")
-        self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4")
-        self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5")
-        self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6")
-        self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7")
-        if block_num == 16:
-            self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8")
-            self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9")
-            self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10")
-            self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11")
-            self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12")
-            self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13")
-            self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14")
-            self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15")
-
-    def forward(self, inputs):
-        x = self._conv_0(inputs)
-        x = self._conv_1(x)
-        x = self._conv_2(x)
-        x = self._conv_3(x)
-        x = self._conv_4(x)
-        x = self._conv_5(x)
-        x = self._conv_6(x)
-        x = self._conv_7(x)
-        if self.block_num == 16:
-            x = self._conv_8(x)
-            x = self._conv_9(x)
-            x = self._conv_10(x)
-            x = self._conv_11(x)
-            x = self._conv_12(x)
-            x = self._conv_13(x)
-            x = self._conv_14(x)
-            x = self._conv_15(x)
-        return x
-
-
-class ExitFlowBottleneckBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels1, output_channels2, name):
-        super(ExitFlowBottleneckBlock, self).__init__()
-
-        self._short = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels2,
-            kernel_size=1,
-            stride=2,
-            padding=0,
-            weight_attr=ParamAttr(name + "_branch1_weights"),
-            bias_attr=False,
-        )
-        self._conv_1 = SeparableConv(
-            input_channels, output_channels1, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv_2 = SeparableConv(
-            output_channels1,
-            output_channels2,
-            stride=1,
-            name=name + "_branch2b_weights",
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-    def forward(self, inputs):
-        short = self._short(inputs)
-        conv0 = F.relu(inputs)
-        conv1 = self._conv_1(conv0)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv_2(conv2)
-        pool = self._pool(conv2)
-        return paddle.add(x=short, y=pool)
-
-
-class ExitFlow(nn.Layer):
-    def __init__(self, class_dim):
-        super(ExitFlow, self).__init__()
-
-        name = "exit_flow"
-
-        self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1")
-        self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2")
-        self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3")
-        self._pool = AdaptiveAvgPool2D(1)
-        stdv = 1.0 / math.sqrt(2048 * 1.0)
-        self._out = Linear(
-            2048,
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        conv0 = self._conv_0(inputs)
-        conv1 = self._conv_1(conv0)
-        conv1 = F.relu(conv1)
-        conv2 = self._conv_2(conv1)
-        conv2 = F.relu(conv2)
-        pool = self._pool(conv2)
-        pool = paddle.flatten(pool, start_axis=1, stop_axis=-1)
-        out = self._out(pool)
-        return out
-
-
-class Xception(nn.Layer):
-    def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8, class_dim=1000):
-        super(Xception, self).__init__()
-        self.entry_flow_block_num = entry_flow_block_num
-        self.middle_flow_block_num = middle_flow_block_num
-        self._entry_flow = EntryFlow(entry_flow_block_num)
-        self._middle_flow = MiddleFlow(middle_flow_block_num)
-        self._exit_flow = ExitFlow(class_dim)
-
-    def forward(self, inputs):
-        x = self._entry_flow(inputs)
-        x = self._middle_flow(x)
-        x = self._exit_flow(x)
-        return x
-
-
-def Xception41(**args):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **args)
-    return model
-
-
-def Xception65(**args):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **args)
-    return model
-
-
-def Xception71(**args):
-    model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **args)
-    return model
-
-
-def test_Xception41():
-    load_paddle_module_and_check(
-        Xception41, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/code_gen/test_xception_deeplab.py b/examples/x2oneflow/paddle2oneflow/code_gen/test_xception_deeplab.py
deleted file mode 100644
index d0310d12cd413e09ec1bf0d38b83a61ac8c67d76..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/code_gen/test_xception_deeplab.py
+++ /dev/null
@@ -1,448 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def check_data(data, number):
-    if type(data) == int:
-        return [data] * number
-    assert len(data) == number
-    return data
-
-
-def check_stride(s, os):
-    if s <= os:
-        return True
-    else:
-        return False
-
-
-def check_points(count, points):
-    if points is None:
-        return False
-    else:
-        if isinstance(points, list):
-            return True if count in points else False
-        else:
-            return True if count == points else False
-
-
-def gen_bottleneck_params(backbone="xception_65"):
-    if backbone == "xception_65":
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    elif backbone == "xception_41":
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (8, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    elif backbone == "xception_71":
-        bottleneck_params = {
-            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    else:
-        raise Exception(
-            "xception backbont only support xception_41/xception_65/xception_71"
-        )
-    return bottleneck_params
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride=1,
-        padding=0,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + "/weights"),
-            bias_attr=False,
-        )
-        self._bn = BatchNorm(
-            num_channels=output_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"),
-            moving_mean_name=name + "/BatchNorm/moving_mean",
-            moving_variance_name=name + "/BatchNorm/moving_variance",
-        )
-
-    def forward(self, inputs):
-        return self._bn(self._conv(inputs))
-
-
-class Seperate_Conv(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        stride,
-        filter,
-        dilation=1,
-        act=None,
-        name=None,
-    ):
-        super(Seperate_Conv, self).__init__()
-
-        self._conv1 = Conv2D(
-            in_channels=input_channels,
-            out_channels=input_channels,
-            kernel_size=filter,
-            stride=stride,
-            groups=input_channels,
-            padding=(filter) // 2 * dilation,
-            dilation=dilation,
-            weight_attr=ParamAttr(name=name + "/depthwise/weights"),
-            bias_attr=False,
-        )
-        self._bn1 = BatchNorm(
-            input_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"),
-            moving_mean_name=name + "/depthwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/depthwise/BatchNorm/moving_variance",
-        )
-        self._conv2 = Conv2D(
-            input_channels,
-            output_channels,
-            1,
-            stride=1,
-            groups=1,
-            padding=0,
-            weight_attr=ParamAttr(name=name + "/pointwise/weights"),
-            bias_attr=False,
-        )
-        self._bn2 = BatchNorm(
-            output_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"),
-            moving_mean_name=name + "/pointwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/pointwise/BatchNorm/moving_variance",
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._bn1(x)
-        x = self._conv2(x)
-        x = self._bn2(x)
-        return x
-
-
-class Xception_Block(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        strides=1,
-        filter_size=3,
-        dilation=1,
-        skip_conv=True,
-        has_skip=True,
-        activation_fn_in_separable_conv=False,
-        name=None,
-    ):
-        super(Xception_Block, self).__init__()
-
-        repeat_number = 3
-        output_channels = check_data(output_channels, repeat_number)
-        filter_size = check_data(filter_size, repeat_number)
-        strides = check_data(strides, repeat_number)
-
-        self.has_skip = has_skip
-        self.skip_conv = skip_conv
-        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
-        if not activation_fn_in_separable_conv:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                dilation=dilation,
-                name=name + "/separable_conv1",
-            )
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                dilation=dilation,
-                name=name + "/separable_conv2",
-            )
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                dilation=dilation,
-                name=name + "/separable_conv3",
-            )
-        else:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv1",
-            )
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv2",
-            )
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv3",
-            )
-
-        if has_skip and skip_conv:
-            self._short = ConvBNLayer(
-                input_channels,
-                output_channels[-1],
-                1,
-                stride=strides[-1],
-                padding=0,
-                name=name + "/shortcut",
-            )
-
-    def forward(self, inputs):
-        if not self.activation_fn_in_separable_conv:
-            x = F.relu(inputs)
-            x = self._conv1(x)
-            x = F.relu(x)
-            x = self._conv2(x)
-            x = F.relu(x)
-            x = self._conv3(x)
-        else:
-            x = self._conv1(inputs)
-            x = self._conv2(x)
-            x = self._conv3(x)
-        if self.has_skip:
-            if self.skip_conv:
-                skip = self._short(inputs)
-            else:
-                skip = inputs
-            return paddle.add(x, skip)
-        else:
-            return x
-
-
-class XceptionDeeplab(nn.Layer):
-    def __init__(self, backbone, class_dim=1000):
-        super(XceptionDeeplab, self).__init__()
-
-        bottleneck_params = gen_bottleneck_params(backbone)
-        self.backbone = backbone
-
-        self._conv1 = ConvBNLayer(
-            3,
-            32,
-            3,
-            stride=2,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv1",
-        )
-        self._conv2 = ConvBNLayer(
-            32,
-            64,
-            3,
-            stride=1,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv2",
-        )
-
-        self.block_num = bottleneck_params["entry_flow"][0]
-        self.strides = bottleneck_params["entry_flow"][1]
-        self.chns = bottleneck_params["entry_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-
-        self.entry_flow = []
-        self.middle_flow = []
-
-        self.stride = 2
-        self.output_stride = 32
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = (
-                self.strides[i]
-                if check_stride(s * self.strides[i], self.output_stride)
-                else 1
-            )
-            xception_block = self.add_sublayer(
-                self.backbone + "/entry_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=64 if i == 0 else self.chns[i - 1],
-                    output_channels=self.chns[i],
-                    strides=[1, 1, self.stride],
-                    name=self.backbone + "/entry_flow/block" + str(i + 1),
-                ),
-            )
-            self.entry_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["middle_flow"][0]
-        self.strides = bottleneck_params["middle_flow"][1]
-        self.chns = bottleneck_params["middle_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = (
-                self.strides[i]
-                if check_stride(s * self.strides[i], self.output_stride)
-                else 1
-            )
-            xception_block = self.add_sublayer(
-                self.backbone + "/middle_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=728,
-                    output_channels=728,
-                    strides=[1, 1, self.strides[i]],
-                    skip_conv=False,
-                    name=self.backbone + "/middle_flow/block" + str(i + 1),
-                ),
-            )
-            self.middle_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["exit_flow"][0]
-        self.strides = bottleneck_params["exit_flow"][1]
-        self.chns = bottleneck_params["exit_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-        stride = (
-            self.strides[0]
-            if check_stride(s * self.strides[0], self.output_stride)
-            else 1
-        )
-        self._exit_flow_1 = Xception_Block(
-            728, self.chns[0], [1, 1, stride], name=self.backbone + "/exit_flow/block1"
-        )
-        s = s * stride
-        stride = (
-            self.strides[1]
-            if check_stride(s * self.strides[1], self.output_stride)
-            else 1
-        )
-        self._exit_flow_2 = Xception_Block(
-            self.chns[0][-1],
-            self.chns[1],
-            [1, 1, stride],
-            dilation=2,
-            has_skip=False,
-            activation_fn_in_separable_conv=True,
-            name=self.backbone + "/exit_flow/block2",
-        )
-        s = s * stride
-
-        self.stride = s
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._pool = AdaptiveAvgPool2D(1)
-        self._fc = Linear(
-            self.chns[1][-1],
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_bias"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        for ef in self.entry_flow:
-            x = ef(x)
-        for mf in self.middle_flow:
-            x = mf(x)
-        x = self._exit_flow_1(x)
-        x = self._exit_flow_2(x)
-        x = self._drop(x)
-        x = self._pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._fc(x)
-        return x
-
-
-def Xception41_deeplab(**args):
-    model = XceptionDeeplab("xception_41", **args)
-    return model
-
-
-def Xception65_deeplab(**args):
-    model = XceptionDeeplab("xception_65", **args)
-    return model
-
-
-def Xception71_deeplab(**args):
-    model = XceptionDeeplab("xception_71", **args)
-    return model
-
-
-def test_Xception41_deeplab():
-    load_paddle_module_and_check(
-        Xception41_deeplab, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=False
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_alexnet.py b/examples/x2oneflow/paddle2oneflow/models/test_alexnet.py
deleted file mode 100644
index a614e6001f7545f23cd92dbe152ad1964c08abcc..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_alexnet.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/modeling/architectures/alexnet.py
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-_all__ = ["AlexNet"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvPoolLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride,
-        padding,
-        stdv,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvPoolLayer, self).__init__()
-
-        self.relu = ReLU() if act == "relu" else None
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(
-                name=name + "_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(
-                name=name + "_offset", initializer=Uniform(-stdv, stdv)
-            ),
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        if self.relu is not None:
-            x = self.relu(x)
-        x = self._pool(x)
-        return x
-
-
-class AlexNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(AlexNetDY, self).__init__()
-
-        stdv = 1.0 / math.sqrt(3 * 11 * 11)
-        self._conv1 = ConvPoolLayer(3, 64, 11, 4, 2, stdv, act="relu", name="conv1")
-        stdv = 1.0 / math.sqrt(64 * 5 * 5)
-        self._conv2 = ConvPoolLayer(64, 192, 5, 1, 2, stdv, act="relu", name="conv2")
-        stdv = 1.0 / math.sqrt(192 * 3 * 3)
-        self._conv3 = Conv2D(
-            192,
-            384,
-            3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(
-                name="conv3_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="conv3_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        stdv = 1.0 / math.sqrt(384 * 3 * 3)
-        self._conv4 = Conv2D(
-            384,
-            256,
-            3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(
-                name="conv4_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="conv4_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        stdv = 1.0 / math.sqrt(256 * 3 * 3)
-        self._conv5 = ConvPoolLayer(256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
-        stdv = 1.0 / math.sqrt(256 * 6 * 6)
-
-        self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc6 = Linear(
-            in_features=256 * 6 * 6,
-            out_features=4096,
-            weight_attr=ParamAttr(name="fc6_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc6_offset", initializer=Uniform(-stdv, stdv)),
-        )
-
-        self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc7 = Linear(
-            in_features=4096,
-            out_features=4096,
-            weight_attr=ParamAttr(name="fc7_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc7_offset", initializer=Uniform(-stdv, stdv)),
-        )
-        self._fc8 = Linear(
-            in_features=4096,
-            out_features=class_dim,
-            weight_attr=ParamAttr(name="fc8_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc8_offset", initializer=Uniform(-stdv, stdv)),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        x = self._conv3(x)
-        x = F.relu(x)
-        x = self._conv4(x)
-        x = F.relu(x)
-        x = self._conv5(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self._drop1(x)
-        x = self._fc6(x)
-        x = F.relu(x)
-        x = self._drop2(x)
-        x = self._fc7(x)
-        x = F.relu(x)
-        x = self._fc8(x)
-        return x
-
-
-def AlexNet(**args):
-    model = AlexNetDY(**args)
-    return model
-
-
-def test_alexnet():
-    load_paddle_module_and_check(
-        AlexNet, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_darknet.py b/examples/x2oneflow/paddle2oneflow/models/test_darknet.py
deleted file mode 100644
index 37575c98fdd0f5a4d19eabb691fb6693cdbed95f..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_darknet.py
+++ /dev/null
@@ -1,180 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/modeling/architectures/darknet.py
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["DarkNet53"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, filter_size, stride, padding, name=None
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + ".conv.weights"),
-            bias_attr=False,
-        )
-
-        bn_name = name + ".bn"
-        self._bn = BatchNorm(
-            num_channels=output_channels,
-            act="relu",
-            param_attr=ParamAttr(name=bn_name + ".scale"),
-            bias_attr=ParamAttr(name=bn_name + ".offset"),
-            moving_mean_name=bn_name + ".mean",
-            moving_variance_name=bn_name + ".var",
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._bn(x)
-        return x
-
-
-class BasicBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, name=None):
-        super(BasicBlock, self).__init__()
-
-        self._conv1 = ConvBNLayer(
-            input_channels, output_channels, 1, 1, 0, name=name + ".0"
-        )
-        self._conv2 = ConvBNLayer(
-            output_channels, output_channels * 2, 3, 1, 1, name=name + ".1"
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        return paddle.add(x=inputs, y=x)
-
-
-class DarkNet(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(DarkNet, self).__init__()
-
-        self.stages = [1, 2, 8, 8, 4]
-        self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input")
-        self._conv2 = ConvBNLayer(32, 64, 3, 2, 1, name="yolo_input.downsample")
-
-        self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0")
-        self._downsample_0 = ConvBNLayer(64, 128, 3, 2, 1, name="stage.0.downsample")
-
-        self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0")
-        self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1")
-        self._downsample_1 = ConvBNLayer(128, 256, 3, 2, 1, name="stage.1.downsample")
-
-        self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0")
-        self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1")
-        self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2")
-        self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3")
-        self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4")
-        self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5")
-        self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6")
-        self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7")
-        self._downsample_2 = ConvBNLayer(256, 512, 3, 2, 1, name="stage.2.downsample")
-
-        self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0")
-        self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1")
-        self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2")
-        self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3")
-        self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4")
-        self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5")
-        self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6")
-        self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7")
-        self._downsample_3 = ConvBNLayer(512, 1024, 3, 2, 1, name="stage.3.downsample")
-
-        self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0")
-        self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1")
-        self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2")
-        self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3")
-
-        self._pool = AdaptiveAvgPool2D(1)
-
-        stdv = 1.0 / math.sqrt(1024.0)
-        self._out = Linear(
-            1024,
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-
-        x = self._basic_block_01(x)
-        x = self._downsample_0(x)
-
-        x = self._basic_block_11(x)
-        x = self._basic_block_12(x)
-        x = self._downsample_1(x)
-
-        x = self._basic_block_21(x)
-        x = self._basic_block_22(x)
-        x = self._basic_block_23(x)
-        x = self._basic_block_24(x)
-        x = self._basic_block_25(x)
-        x = self._basic_block_26(x)
-        x = self._basic_block_27(x)
-        x = self._basic_block_28(x)
-        x = self._downsample_2(x)
-
-        x = self._basic_block_31(x)
-        x = self._basic_block_32(x)
-        x = self._basic_block_33(x)
-        x = self._basic_block_34(x)
-        x = self._basic_block_35(x)
-        x = self._basic_block_36(x)
-        x = self._basic_block_37(x)
-        x = self._basic_block_38(x)
-        x = self._downsample_3(x)
-
-        x = self._basic_block_41(x)
-        x = self._basic_block_42(x)
-        x = self._basic_block_43(x)
-        x = self._basic_block_44(x)
-
-        x = self._pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._out(x)
-        return x
-
-
-def DarkNet53(**args):
-    model = DarkNet(**args)
-    return model
-
-
-def test_darknet():
-    load_paddle_module_and_check(
-        DarkNet53, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_densenet.py b/examples/x2oneflow/paddle2oneflow/models/test_densenet.py
deleted file mode 100644
index 5b60a90ea8f40f9394fb8cf26a827e2a0a339753..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_densenet.py
+++ /dev/null
@@ -1,332 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", "DenseNet264"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class BNACConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(BNACConvLayer, self).__init__()
-
-        self._batch_norm = BatchNorm(
-            num_channels,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, input):
-        y = self._batch_norm(input)
-        y = self._conv(y)
-        return y
-
-
-class DenseLayer(nn.Layer):
-    def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None):
-        super(DenseLayer, self).__init__()
-        self.dropout = dropout
-
-        self.bn_ac_func1 = BNACConvLayer(
-            num_channels=num_channels,
-            num_filters=bn_size * growth_rate,
-            filter_size=1,
-            pad=0,
-            stride=1,
-            name=name + "_x1",
-        )
-
-        self.bn_ac_func2 = BNACConvLayer(
-            num_channels=bn_size * growth_rate,
-            num_filters=growth_rate,
-            filter_size=3,
-            pad=1,
-            stride=1,
-            name=name + "_x2",
-        )
-
-        if dropout:
-            self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
-
-    def forward(self, input):
-        conv = self.bn_ac_func1(input)
-        conv = self.bn_ac_func2(conv)
-        if self.dropout:
-            conv = self.dropout_func(conv)
-        conv = paddle.concat([input, conv], axis=1)
-        return conv
-
-
-class DenseBlock(nn.Layer):
-    def __init__(
-        self, num_channels, num_layers, bn_size, growth_rate, dropout, name=None
-    ):
-        super(DenseBlock, self).__init__()
-        self.dropout = dropout
-
-        self.dense_layer_func = []
-
-        pre_channel = num_channels
-        for layer in range(num_layers):
-            self.dense_layer_func.append(
-                self.add_sublayer(
-                    "{}_{}".format(name, layer + 1),
-                    DenseLayer(
-                        num_channels=pre_channel,
-                        growth_rate=growth_rate,
-                        bn_size=bn_size,
-                        dropout=dropout,
-                        name=name + "_" + str(layer + 1),
-                    ),
-                )
-            )
-            pre_channel = pre_channel + growth_rate
-
-    def forward(self, input):
-        conv = input
-        for func in self.dense_layer_func:
-            conv = func(conv)
-        return conv
-
-
-class TransitionLayer(nn.Layer):
-    def __init__(self, num_channels, num_output_features, name=None):
-        super(TransitionLayer, self).__init__()
-
-        self.conv_ac_func = BNACConvLayer(
-            num_channels=num_channels,
-            num_filters=num_output_features,
-            filter_size=1,
-            pad=0,
-            stride=1,
-            name=name,
-        )
-
-        self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0)
-
-    def forward(self, input):
-        y = self.conv_ac_func(input)
-        y = self.pool2d_avg(y)
-        return y
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        return y
-
-
-class DenseNet(nn.Layer):
-    def __init__(self, layers=60, bn_size=4, dropout=0, class_dim=1000):
-        super(DenseNet, self).__init__()
-
-        supported_layers = [121, 161, 169, 201, 264]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        densenet_spec = {
-            121: (64, 32, [6, 12, 24, 16]),
-            161: (96, 48, [6, 12, 36, 24]),
-            169: (64, 32, [6, 12, 32, 32]),
-            201: (64, 32, [6, 12, 48, 32]),
-            264: (64, 32, [6, 12, 64, 48]),
-        }
-        num_init_features, growth_rate, block_config = densenet_spec[layers]
-
-        self.conv1_func = ConvBNLayer(
-            num_channels=3,
-            num_filters=num_init_features,
-            filter_size=7,
-            stride=2,
-            pad=3,
-            act="relu",
-            name="conv1",
-        )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_config = block_config
-
-        self.dense_block_func_list = []
-        self.transition_func_list = []
-        pre_num_channels = num_init_features
-        num_features = num_init_features
-        for i, num_layers in enumerate(block_config):
-            self.dense_block_func_list.append(
-                self.add_sublayer(
-                    "db_conv_{}".format(i + 2),
-                    DenseBlock(
-                        num_channels=pre_num_channels,
-                        num_layers=num_layers,
-                        bn_size=bn_size,
-                        growth_rate=growth_rate,
-                        dropout=dropout,
-                        name="conv" + str(i + 2),
-                    ),
-                )
-            )
-
-            num_features = num_features + num_layers * growth_rate
-            pre_num_channels = num_features
-
-            if i != len(block_config) - 1:
-                self.transition_func_list.append(
-                    self.add_sublayer(
-                        "tr_conv{}_blk".format(i + 2),
-                        TransitionLayer(
-                            num_channels=pre_num_channels,
-                            num_output_features=num_features // 2,
-                            name="conv" + str(i + 2) + "_blk",
-                        ),
-                    )
-                )
-                pre_num_channels = num_features // 2
-                num_features = num_features // 2
-
-        self.batch_norm = BatchNorm(
-            num_features,
-            act="relu",
-            param_attr=ParamAttr(name="conv5_blk_bn_scale"),
-            bias_attr=ParamAttr(name="conv5_blk_bn_offset"),
-            moving_mean_name="conv5_blk_bn_mean",
-            moving_variance_name="conv5_blk_bn_variance",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        stdv = 1.0 / math.sqrt(num_features * 1.0)
-
-        self.out = Linear(
-            num_features,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, input):
-        conv = self.conv1_func(input)
-        conv = self.pool2d_max(conv)
-
-        for i, num_layers in enumerate(self.block_config):
-            conv = self.dense_block_func_list[i](conv)
-            if i != len(self.block_config) - 1:
-                conv = self.transition_func_list[i](conv)
-
-        conv = self.batch_norm(conv)
-        y = self.pool2d_avg(conv)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        # y = paddle.reshape(y, [y.shape[0], -1])
-        y = self.out(y)
-        return y
-
-
-def DenseNet121(**args):
-    model = DenseNet(layers=121, **args)
-    return model
-
-
-def DenseNet161(**args):
-    model = DenseNet(layers=161, **args)
-    return model
-
-
-def DenseNet169(**args):
-    model = DenseNet(layers=169, **args)
-    return model
-
-
-def DenseNet201(**args):
-    model = DenseNet(layers=201, **args)
-    return model
-
-
-def DenseNet264(**args):
-    model = DenseNet(layers=264, **args)
-    return model
-
-
-def test_densenet121():
-    load_paddle_module_and_check(
-        DenseNet121, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_dpn.py b/examples/x2oneflow/paddle2oneflow/models/test_dpn.py
deleted file mode 100644
index 8b1c908b59bde1270e464a732bf67558d5c08394..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_dpn.py
+++ /dev/null
@@ -1,452 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import sys
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = [
-    "DPN",
-    "DPN68",
-    "DPN92",
-    "DPN98",
-    "DPN107",
-    "DPN131",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        return y
-
-
-class BNACConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        pad=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(BNACConvLayer, self).__init__()
-        self.num_channels = num_channels
-
-        self._batch_norm = BatchNorm(
-            num_channels,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=pad,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, input):
-        y = self._batch_norm(input)
-        y = self._conv(y)
-        return y
-
-
-class DualPathFactory(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_1x1_a,
-        num_3x3_b,
-        num_1x1_c,
-        inc,
-        G,
-        _type="normal",
-        name=None,
-    ):
-        super(DualPathFactory, self).__init__()
-
-        self.num_1x1_c = num_1x1_c
-        self.inc = inc
-        self.name = name
-
-        kw = 3
-        kh = 3
-        pw = (kw - 1) // 2
-        ph = (kh - 1) // 2
-
-        # type
-        if _type == "proj":
-            key_stride = 1
-            self.has_proj = True
-        elif _type == "down":
-            key_stride = 2
-            self.has_proj = True
-        elif _type == "normal":
-            key_stride = 1
-            self.has_proj = False
-        else:
-            print("not implemented now!!!")
-            sys.exit(1)
-
-        data_in_ch = (
-            sum(num_channels) if isinstance(num_channels, list) else num_channels
-        )
-
-        if self.has_proj:
-            self.c1x1_w_func = BNACConvLayer(
-                num_channels=data_in_ch,
-                num_filters=num_1x1_c + 2 * inc,
-                filter_size=(1, 1),
-                pad=(0, 0),
-                stride=(key_stride, key_stride),
-                name=name + "_match",
-            )
-
-        self.c1x1_a_func = BNACConvLayer(
-            num_channels=data_in_ch,
-            num_filters=num_1x1_a,
-            filter_size=(1, 1),
-            pad=(0, 0),
-            name=name + "_conv1",
-        )
-
-        self.c3x3_b_func = BNACConvLayer(
-            num_channels=num_1x1_a,
-            num_filters=num_3x3_b,
-            filter_size=(kw, kh),
-            pad=(pw, ph),
-            stride=(key_stride, key_stride),
-            groups=G,
-            name=name + "_conv2",
-        )
-
-        self.c1x1_c_func = BNACConvLayer(
-            num_channels=num_3x3_b,
-            num_filters=num_1x1_c + inc,
-            filter_size=(1, 1),
-            pad=(0, 0),
-            name=name + "_conv3",
-        )
-
-    def forward(self, input):
-        # PROJ
-        if isinstance(input, list):
-            data_in = paddle.concat([input[0], input[1]], axis=1)
-        else:
-            data_in = input
-
-        if self.has_proj:
-            c1x1_w = self.c1x1_w_func(data_in)
-            data_o1, data_o2 = paddle.split(
-                c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1
-            )
-        else:
-            data_o1 = input[0]
-            data_o2 = input[1]
-
-        c1x1_a = self.c1x1_a_func(data_in)
-        c3x3_b = self.c3x3_b_func(c1x1_a)
-        c1x1_c = self.c1x1_c_func(c3x3_b)
-
-        c1x1_c1, c1x1_c2 = paddle.split(
-            c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1
-        )
-
-        # OUTPUTS
-        summ = paddle.add(x=data_o1, y=c1x1_c1)
-        dense = paddle.concat([data_o2, c1x1_c2], axis=1)
-        # tensor, channels
-        return [summ, dense]
-
-
-class DPN(nn.Layer):
-    def __init__(self, layers=68, class_dim=1000):
-        super(DPN, self).__init__()
-
-        self._class_dim = class_dim
-
-        args = self.get_net_args(layers)
-        bws = args["bw"]
-        inc_sec = args["inc_sec"]
-        rs = args["r"]
-        k_r = args["k_r"]
-        k_sec = args["k_sec"]
-        G = args["G"]
-        init_num_filter = args["init_num_filter"]
-        init_filter_size = args["init_filter_size"]
-        init_padding = args["init_padding"]
-
-        self.k_sec = k_sec
-
-        self.conv1_x_1_func = ConvBNLayer(
-            num_channels=3,
-            num_filters=init_num_filter,
-            filter_size=init_filter_size,
-            stride=2,
-            pad=init_padding,
-            act="relu",
-            name="conv1",
-        )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        num_channel_dpn = init_num_filter
-
-        self.dpn_func_list = []
-        # conv2 - conv5
-        match_list, num = [], 0
-        for gc in range(4):
-            bw = bws[gc]
-            inc = inc_sec[gc]
-            R = (k_r * bw) // rs[gc]
-            if gc == 0:
-                _type1 = "proj"
-                _type2 = "normal"
-                match = 1
-            else:
-                _type1 = "down"
-                _type2 = "normal"
-                match = match + k_sec[gc - 1]
-            match_list.append(match)
-            self.dpn_func_list.append(
-                self.add_sublayer(
-                    "dpn{}".format(match),
-                    DualPathFactory(
-                        num_channels=num_channel_dpn,
-                        num_1x1_a=R,
-                        num_3x3_b=R,
-                        num_1x1_c=bw,
-                        inc=inc,
-                        G=G,
-                        _type=_type1,
-                        name="dpn" + str(match),
-                    ),
-                )
-            )
-            num_channel_dpn = [bw, 3 * inc]
-
-            for i_ly in range(2, k_sec[gc] + 1):
-                num += 1
-                if num in match_list:
-                    num += 1
-                self.dpn_func_list.append(
-                    self.add_sublayer(
-                        "dpn{}".format(num),
-                        DualPathFactory(
-                            num_channels=num_channel_dpn,
-                            num_1x1_a=R,
-                            num_3x3_b=R,
-                            num_1x1_c=bw,
-                            inc=inc,
-                            G=G,
-                            _type=_type2,
-                            name="dpn" + str(num),
-                        ),
-                    )
-                )
-
-                num_channel_dpn = [num_channel_dpn[0], num_channel_dpn[1] + inc]
-
-        out_channel = sum(num_channel_dpn)
-
-        self.conv5_x_x_bn = BatchNorm(
-            num_channels=sum(num_channel_dpn),
-            act="relu",
-            param_attr=ParamAttr(name="final_concat_bn_scale"),
-            bias_attr=ParamAttr("final_concat_bn_offset"),
-            moving_mean_name="final_concat_bn_mean",
-            moving_variance_name="final_concat_bn_variance",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        stdv = 0.01
-
-        self.out = Linear(
-            out_channel,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, input):
-        conv1_x_1 = self.conv1_x_1_func(input)
-        convX_x_x = self.pool2d_max(conv1_x_1)
-
-        dpn_idx = 0
-        for gc in range(4):
-            convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
-            dpn_idx += 1
-            for i_ly in range(2, self.k_sec[gc] + 1):
-                convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
-                dpn_idx += 1
-
-        conv5_x_x = paddle.concat(convX_x_x, axis=1)
-        conv5_x_x = self.conv5_x_x_bn(conv5_x_x)
-
-        y = self.pool2d_avg(conv5_x_x)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-    def get_net_args(self, layers):
-        if layers == 68:
-            k_r = 128
-            G = 32
-            k_sec = [3, 4, 12, 3]
-            inc_sec = [16, 32, 32, 64]
-            bw = [64, 128, 256, 512]
-            r = [64, 64, 64, 64]
-            init_num_filter = 10
-            init_filter_size = 3
-            init_padding = 1
-        elif layers == 92:
-            k_r = 96
-            G = 32
-            k_sec = [3, 4, 20, 3]
-            inc_sec = [16, 32, 24, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 64
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 98:
-            k_r = 160
-            G = 40
-            k_sec = [3, 6, 20, 3]
-            inc_sec = [16, 32, 32, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 96
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 107:
-            k_r = 200
-            G = 50
-            k_sec = [4, 8, 20, 3]
-            inc_sec = [20, 64, 64, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 128
-            init_filter_size = 7
-            init_padding = 3
-        elif layers == 131:
-            k_r = 160
-            G = 40
-            k_sec = [4, 8, 28, 3]
-            inc_sec = [16, 32, 32, 128]
-            bw = [256, 512, 1024, 2048]
-            r = [256, 256, 256, 256]
-            init_num_filter = 128
-            init_filter_size = 7
-            init_padding = 3
-        else:
-            raise NotImplementedError
-        net_arg = {
-            "k_r": k_r,
-            "G": G,
-            "k_sec": k_sec,
-            "inc_sec": inc_sec,
-            "bw": bw,
-            "r": r,
-        }
-        net_arg["init_num_filter"] = init_num_filter
-        net_arg["init_filter_size"] = init_filter_size
-        net_arg["init_padding"] = init_padding
-
-        return net_arg
-
-
-def DPN68(**args):
-    model = DPN(layers=68, **args)
-    return model
-
-
-def DPN92(**args):
-    model = DPN(layers=92, **args)
-    return model
-
-
-def DPN98(**args):
-    model = DPN(layers=98, **args)
-    return model
-
-
-def DPN107(**args):
-    model = DPN(layers=107, **args)
-    return model
-
-
-def DPN131(**args):
-    model = DPN(layers=131, **args)
-    return model
-
-
-def test_dpn68():
-    load_paddle_module_and_check(
-        DPN68, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_efficientnet.py b/examples/x2oneflow/paddle2oneflow/models/test_efficientnet.py
deleted file mode 100644
index 144cb58bb306dfad8f83045cc95d1e513f9dc841..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_efficientnet.py
+++ /dev/null
@@ -1,975 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-import math
-import collections
-import re
-import copy
-
-__all__ = [
-    "EfficientNet",
-    "EfficientNetB0_small",
-    "EfficientNetB0",
-    "EfficientNetB1",
-    "EfficientNetB2",
-    "EfficientNetB3",
-    "EfficientNetB4",
-    "EfficientNetB5",
-    "EfficientNetB6",
-    "EfficientNetB7",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-GlobalParams = collections.namedtuple(
-    "GlobalParams",
-    [
-        "batch_norm_momentum",
-        "batch_norm_epsilon",
-        "dropout_rate",
-        "num_classes",
-        "width_coefficient",
-        "depth_coefficient",
-        "depth_divisor",
-        "min_depth",
-        "drop_connect_rate",
-    ],
-)
-
-BlockArgs = collections.namedtuple(
-    "BlockArgs",
-    [
-        "kernel_size",
-        "num_repeat",
-        "input_filters",
-        "output_filters",
-        "expand_ratio",
-        "id_skip",
-        "stride",
-        "se_ratio",
-    ],
-)
-
-GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
-BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
-
-
-def efficientnet_params(model_name):
-    """ Map EfficientNet model name to parameter coefficients. """
-    params_dict = {
-        # Coefficients:   width,depth,resolution,dropout
-        "efficientnet-b0": (1.0, 1.0, 224, 0.2),
-        "efficientnet-b1": (1.0, 1.1, 240, 0.2),
-        "efficientnet-b2": (1.1, 1.2, 260, 0.3),
-        "efficientnet-b3": (1.2, 1.4, 300, 0.3),
-        "efficientnet-b4": (1.4, 1.8, 380, 0.4),
-        "efficientnet-b5": (1.6, 2.2, 456, 0.4),
-        "efficientnet-b6": (1.8, 2.6, 528, 0.5),
-        "efficientnet-b7": (2.0, 3.1, 600, 0.5),
-    }
-    return params_dict[model_name]
-
-
-def efficientnet(
-    width_coefficient=None,
-    depth_coefficient=None,
-    dropout_rate=0.2,
-    drop_connect_rate=0.2,
-):
-    """ Get block arguments according to parameter and coefficients. """
-    blocks_args = [
-        "r1_k3_s11_e1_i32_o16_se0.25",
-        "r2_k3_s22_e6_i16_o24_se0.25",
-        "r2_k5_s22_e6_i24_o40_se0.25",
-        "r3_k3_s22_e6_i40_o80_se0.25",
-        "r3_k5_s11_e6_i80_o112_se0.25",
-        "r4_k5_s22_e6_i112_o192_se0.25",
-        "r1_k3_s11_e6_i192_o320_se0.25",
-    ]
-    blocks_args = BlockDecoder.decode(blocks_args)
-
-    global_params = GlobalParams(
-        batch_norm_momentum=0.99,
-        batch_norm_epsilon=1e-3,
-        dropout_rate=dropout_rate,
-        drop_connect_rate=drop_connect_rate,
-        num_classes=1000,
-        width_coefficient=width_coefficient,
-        depth_coefficient=depth_coefficient,
-        depth_divisor=8,
-        min_depth=None,
-    )
-
-    return blocks_args, global_params
-
-
-def get_model_params(model_name, override_params):
-    """ Get the block args and global params for a given model """
-    if model_name.startswith("efficientnet"):
-        w, d, _, p = efficientnet_params(model_name)
-        blocks_args, global_params = efficientnet(
-            width_coefficient=w, depth_coefficient=d, dropout_rate=p
-        )
-    else:
-        raise NotImplementedError("model name is not pre-defined: %s" % model_name)
-    if override_params:
-        global_params = global_params._replace(**override_params)
-    return blocks_args, global_params
-
-
-def round_filters(filters, global_params):
-    """ Calculate and round number of filters based on depth multiplier. """
-    multiplier = global_params.width_coefficient
-    if not multiplier:
-        return filters
-    divisor = global_params.depth_divisor
-    min_depth = global_params.min_depth
-    filters *= multiplier
-    min_depth = min_depth or divisor
-    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
-        new_filters += divisor
-    return int(new_filters)
-
-
-def round_repeats(repeats, global_params):
-    """ Round number of filters based on depth multiplier. """
-    multiplier = global_params.depth_coefficient
-    if not multiplier:
-        return repeats
-    return int(math.ceil(multiplier * repeats))
-
-
-class BlockDecoder(object):
-    """
-    Block Decoder, straight from the official TensorFlow repository.
-    """
-
-    @staticmethod
-    def _decode_block_string(block_string):
-        """ Gets a block through a string notation of arguments. """
-        assert isinstance(block_string, str)
-
-        ops = block_string.split("_")
-        options = {}
-        for op in ops:
-            splits = re.split(r"(\d.*)", op)
-            if len(splits) >= 2:
-                key, value = splits[:2]
-                options[key] = value
-
-        # Check stride
-        cond_1 = "s" in options and len(options["s"]) == 1
-        cond_2 = (len(options["s"]) == 2) and (options["s"][0] == options["s"][1])
-        assert cond_1 or cond_2
-
-        return BlockArgs(
-            kernel_size=int(options["k"]),
-            num_repeat=int(options["r"]),
-            input_filters=int(options["i"]),
-            output_filters=int(options["o"]),
-            expand_ratio=int(options["e"]),
-            id_skip=("noskip" not in block_string),
-            se_ratio=float(options["se"]) if "se" in options else None,
-            stride=[int(options["s"][0])],
-        )
-
-    @staticmethod
-    def _encode_block_string(block):
-        """Encodes a block to a string."""
-        args = [
-            "r%d" % block.num_repeat,
-            "k%d" % block.kernel_size,
-            "s%d%d" % (block.strides[0], block.strides[1]),
-            "e%s" % block.expand_ratio,
-            "i%d" % block.input_filters,
-            "o%d" % block.output_filters,
-        ]
-        if 0 < block.se_ratio <= 1:
-            args.append("se%s" % block.se_ratio)
-        if block.id_skip is False:
-            args.append("noskip")
-        return "_".join(args)
-
-    @staticmethod
-    def decode(string_list):
-        """
-        Decode a list of string notations to specify blocks in the network.
-        string_list: list of strings, each string is a notation of block
-        return
-            list of BlockArgs namedtuples of block args
-        """
-        assert isinstance(string_list, list)
-        blocks_args = []
-        for block_string in string_list:
-            blocks_args.append(BlockDecoder._decode_block_string(block_string))
-        return blocks_args
-
-    @staticmethod
-    def encode(blocks_args):
-        """
-        Encodes a list of BlockArgs to a list of strings.
-        :param blocks_args: a list of BlockArgs namedtuples of block args
-        :return: a list of strings, each string is a notation of block
-        """
-        block_strings = []
-        for block in blocks_args:
-            block_strings.append(BlockDecoder._encode_block_string(block))
-        return block_strings
-
-
-def initial_type(name, use_bias=False):
-    param_attr = ParamAttr(name=name + "_weights")
-    if use_bias:
-        bias_attr = ParamAttr(name=name + "_offset")
-    else:
-        bias_attr = False
-    return param_attr, bias_attr
-
-
-def init_batch_norm_layer(name="batch_norm"):
-    param_attr = ParamAttr(name=name + "_scale")
-    bias_attr = ParamAttr(name=name + "_offset")
-    return param_attr, bias_attr
-
-
-def init_fc_layer(name="fc"):
-    param_attr = ParamAttr(name=name + "_weights")
-    bias_attr = ParamAttr(name=name + "_offset")
-    return param_attr, bias_attr
-
-
-def cal_padding(img_size, stride, filter_size, dilation=1):
-    """Calculate padding size."""
-    if img_size % stride == 0:
-        out_size = max(filter_size - stride, 0)
-    else:
-        out_size = max(filter_size - (img_size % stride), 0)
-    return out_size // 2, out_size - out_size // 2
-
-
-inp_shape = {
-    "b0_small": [224, 112, 112, 56, 28, 14, 14, 7],
-    "b0": [224, 112, 112, 56, 28, 14, 14, 7],
-    "b1": [240, 120, 120, 60, 30, 15, 15, 8],
-    "b2": [260, 130, 130, 65, 33, 17, 17, 9],
-    "b3": [300, 150, 150, 75, 38, 19, 19, 10],
-    "b4": [380, 190, 190, 95, 48, 24, 24, 12],
-    "b5": [456, 228, 228, 114, 57, 29, 29, 15],
-    "b6": [528, 264, 264, 132, 66, 33, 33, 17],
-    "b7": [600, 300, 300, 150, 75, 38, 38, 19],
-}
-
-
-def _drop_connect(inputs, prob, is_test):
-    if is_test:
-        output = inputs
-    else:
-        keep_prob = 1.0 - prob
-        inputs_shape = paddle.shape(inputs)
-        random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1])
-        binary_tensor = paddle.floor(random_tensor)
-        output = paddle.multiply(inputs, binary_tensor) / keep_prob
-    return output
-
-
-class Conv2ds(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=None,
-        name="conv2d",
-        act=None,
-        use_bias=False,
-        padding_type=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(Conv2ds, self).__init__()
-        assert act in [None, "swish", "sigmoid"]
-        self.act = act
-
-        param_attr, bias_attr = initial_type(name=name, use_bias=use_bias)
-
-        def get_padding(filter_size, stride=1, dilation=1):
-            padding = ((stride - 1) + dilation * (filter_size - 1)) // 2
-            return padding
-
-        inps = (
-            1
-            if model_name == None and cur_stage == None
-            else inp_shape[model_name][cur_stage]
-        )
-        self.need_crop = False
-        if padding_type == "SAME":
-            top_padding, bottom_padding = cal_padding(inps, stride, filter_size)
-            left_padding, right_padding = cal_padding(inps, stride, filter_size)
-            height_padding = bottom_padding
-            width_padding = right_padding
-            if top_padding != bottom_padding or left_padding != right_padding:
-                height_padding = top_padding + stride
-                width_padding = left_padding + stride
-                self.need_crop = True
-            padding = [height_padding, width_padding]
-        elif padding_type == "VALID":
-            height_padding = 0
-            width_padding = 0
-            padding = [height_padding, width_padding]
-        elif padding_type == "DYNAMIC":
-            padding = get_padding(filter_size, stride)
-        else:
-            padding = padding_type
-
-        groups = 1 if groups is None else groups
-        self._conv = Conv2D(
-            input_channels,
-            output_channels,
-            filter_size,
-            groups=groups,
-            stride=stride,
-            #             act=act,
-            padding=padding,
-            weight_attr=param_attr,
-            bias_attr=bias_attr,
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        if self.act == "swish":
-            x = F.swish(x)
-        elif self.act == "sigmoid":
-            x = F.sigmoid(x)
-
-        if self.need_crop:
-            x = x[:, :, 1:, 1:]
-        return x
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        filter_size,
-        output_channels,
-        stride=1,
-        num_groups=1,
-        padding_type="SAME",
-        conv_act=None,
-        bn_act="swish",
-        use_bn=True,
-        use_bias=False,
-        name=None,
-        conv_name=None,
-        bn_name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2ds(
-            input_channels=input_channels,
-            output_channels=output_channels,
-            filter_size=filter_size,
-            stride=stride,
-            groups=num_groups,
-            act=conv_act,
-            padding_type=padding_type,
-            name=conv_name,
-            use_bias=use_bias,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-        self.use_bn = use_bn
-        if use_bn is True:
-            bn_name = name + bn_name
-            param_attr, bias_attr = init_batch_norm_layer(bn_name)
-
-            self._bn = BatchNorm(
-                num_channels=output_channels,
-                act=bn_act,
-                momentum=0.99,
-                epsilon=0.001,
-                moving_mean_name=bn_name + "_mean",
-                moving_variance_name=bn_name + "_variance",
-                param_attr=param_attr,
-                bias_attr=bias_attr,
-            )
-
-    def forward(self, inputs):
-        if self.use_bn:
-            x = self._conv(inputs)
-            x = self._bn(x)
-            return x
-        else:
-            return self._conv(inputs)
-
-
-class ExpandConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ExpandConvNorm, self).__init__()
-
-        self.oup = block_args.input_filters * block_args.expand_ratio
-        self.expand_ratio = block_args.expand_ratio
-
-        if self.expand_ratio != 1:
-            self._conv = ConvBNLayer(
-                input_channels,
-                1,
-                self.oup,
-                bn_act=None,
-                padding_type=padding_type,
-                name=name,
-                conv_name=name + "_expand_conv",
-                bn_name="_bn0",
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-    def forward(self, inputs):
-        if self.expand_ratio != 1:
-            return self._conv(inputs)
-        else:
-            return inputs
-
-
-class DepthwiseConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(DepthwiseConvNorm, self).__init__()
-
-        self.k = block_args.kernel_size
-        self.s = block_args.stride
-        if isinstance(self.s, list) or isinstance(self.s, tuple):
-            self.s = self.s[0]
-        oup = block_args.input_filters * block_args.expand_ratio
-
-        self._conv = ConvBNLayer(
-            input_channels,
-            self.k,
-            oup,
-            self.s,
-            num_groups=input_channels,
-            bn_act=None,
-            padding_type=padding_type,
-            name=name,
-            conv_name=name + "_depthwise_conv",
-            bn_name="_bn1",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class ProjectConvNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ProjectConvNorm, self).__init__()
-
-        final_oup = block_args.output_filters
-
-        self._conv = ConvBNLayer(
-            input_channels,
-            1,
-            final_oup,
-            bn_act=None,
-            padding_type=padding_type,
-            name=name,
-            conv_name=name + "_project_conv",
-            bn_name="_bn2",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class SEBlock(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        num_squeezed_channels,
-        oup,
-        padding_type,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(SEBlock, self).__init__()
-
-        self._pool = AdaptiveAvgPool2D(1)
-        self._conv1 = Conv2ds(
-            input_channels,
-            num_squeezed_channels,
-            1,
-            use_bias=True,
-            padding_type=padding_type,
-            act="swish",
-            name=name + "_se_reduce",
-        )
-
-        self._conv2 = Conv2ds(
-            num_squeezed_channels,
-            oup,
-            1,
-            act="sigmoid",
-            use_bias=True,
-            padding_type=padding_type,
-            name=name + "_se_expand",
-        )
-
-    def forward(self, inputs):
-        x = self._pool(inputs)
-        x = self._conv1(x)
-        x = self._conv2(x)
-        out = paddle.multiply(inputs, x)
-        return out
-
-
-class MbConvBlock(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        block_args,
-        padding_type,
-        use_se,
-        name=None,
-        drop_connect_rate=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(MbConvBlock, self).__init__()
-
-        oup = block_args.input_filters * block_args.expand_ratio
-        self.block_args = block_args
-        self.has_se = (
-            use_se
-            and (block_args.se_ratio is not None)
-            and (0 < block_args.se_ratio <= 1)
-        )
-        self.id_skip = block_args.id_skip
-        self.expand_ratio = block_args.expand_ratio
-        self.drop_connect_rate = drop_connect_rate
-
-        if self.expand_ratio != 1:
-            self._ecn = ExpandConvNorm(
-                input_channels,
-                block_args,
-                padding_type=padding_type,
-                name=name,
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-        self._dcn = DepthwiseConvNorm(
-            input_channels * block_args.expand_ratio,
-            block_args,
-            padding_type=padding_type,
-            name=name,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-        if self.has_se:
-            num_squeezed_channels = max(
-                1, int(block_args.input_filters * block_args.se_ratio)
-            )
-            self._se = SEBlock(
-                input_channels * block_args.expand_ratio,
-                num_squeezed_channels,
-                oup,
-                padding_type=padding_type,
-                name=name,
-                model_name=model_name,
-                cur_stage=cur_stage,
-            )
-
-        self._pcn = ProjectConvNorm(
-            input_channels * block_args.expand_ratio,
-            block_args,
-            padding_type=padding_type,
-            name=name,
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        x = inputs
-        if self.expand_ratio != 1:
-            x = self._ecn(x)
-            x = F.swish(x)
-
-        x = self._dcn(x)
-        x = F.swish(x)
-        if self.has_se:
-            x = self._se(x)
-        x = self._pcn(x)
-
-        if (
-            self.id_skip
-            and self.block_args.stride == 1
-            and self.block_args.input_filters == self.block_args.output_filters
-        ):
-            if self.drop_connect_rate:
-                x = _drop_connect(x, self.drop_connect_rate, not self.training)
-            x = paddle.add(x, inputs)
-        return x
-
-
-class ConvStemNorm(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        padding_type,
-        _global_params,
-        name=None,
-        model_name=None,
-        cur_stage=None,
-    ):
-        super(ConvStemNorm, self).__init__()
-
-        output_channels = round_filters(32, _global_params)
-        self._conv = ConvBNLayer(
-            input_channels,
-            filter_size=3,
-            output_channels=output_channels,
-            stride=2,
-            bn_act=None,
-            padding_type=padding_type,
-            name="",
-            conv_name="_conv_stem",
-            bn_name="_bn0",
-            model_name=model_name,
-            cur_stage=cur_stage,
-        )
-
-    def forward(self, inputs):
-        return self._conv(inputs)
-
-
-class ExtractFeatures(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        _block_args,
-        _global_params,
-        padding_type,
-        use_se,
-        model_name=None,
-    ):
-        super(ExtractFeatures, self).__init__()
-
-        self._global_params = _global_params
-
-        self._conv_stem = ConvStemNorm(
-            input_channels,
-            padding_type=padding_type,
-            _global_params=_global_params,
-            model_name=model_name,
-            cur_stage=0,
-        )
-
-        self.block_args_copy = copy.deepcopy(_block_args)
-        idx = 0
-        block_size = 0
-        for block_arg in self.block_args_copy:
-            block_arg = block_arg._replace(
-                input_filters=round_filters(block_arg.input_filters, _global_params),
-                output_filters=round_filters(block_arg.output_filters, _global_params),
-                num_repeat=round_repeats(block_arg.num_repeat, _global_params),
-            )
-            block_size += 1
-            for _ in range(block_arg.num_repeat - 1):
-                block_size += 1
-
-        self.conv_seq = []
-        cur_stage = 1
-        for block_args in _block_args:
-            block_args = block_args._replace(
-                input_filters=round_filters(block_args.input_filters, _global_params),
-                output_filters=round_filters(block_args.output_filters, _global_params),
-                num_repeat=round_repeats(block_args.num_repeat, _global_params),
-            )
-
-            drop_connect_rate = self._global_params.drop_connect_rate
-            if drop_connect_rate:
-                drop_connect_rate *= float(idx) / block_size
-
-            _mc_block = self.add_sublayer(
-                "_blocks." + str(idx) + ".",
-                MbConvBlock(
-                    block_args.input_filters,
-                    block_args=block_args,
-                    padding_type=padding_type,
-                    use_se=use_se,
-                    name="_blocks." + str(idx) + ".",
-                    drop_connect_rate=drop_connect_rate,
-                    model_name=model_name,
-                    cur_stage=cur_stage,
-                ),
-            )
-            self.conv_seq.append(_mc_block)
-            idx += 1
-            if block_args.num_repeat > 1:
-                block_args = block_args._replace(
-                    input_filters=block_args.output_filters, stride=1
-                )
-            for _ in range(block_args.num_repeat - 1):
-                drop_connect_rate = self._global_params.drop_connect_rate
-                if drop_connect_rate:
-                    drop_connect_rate *= float(idx) / block_size
-                _mc_block = self.add_sublayer(
-                    "block." + str(idx) + ".",
-                    MbConvBlock(
-                        block_args.input_filters,
-                        block_args,
-                        padding_type=padding_type,
-                        use_se=use_se,
-                        name="_blocks." + str(idx) + ".",
-                        drop_connect_rate=drop_connect_rate,
-                        model_name=model_name,
-                        cur_stage=cur_stage,
-                    ),
-                )
-                self.conv_seq.append(_mc_block)
-                idx += 1
-            cur_stage += 1
-
-    def forward(self, inputs):
-        x = self._conv_stem(inputs)
-        x = F.swish(x)
-        for _mc_block in self.conv_seq:
-            x = _mc_block(x)
-        return x
-
-
-class EfficientNet(nn.Layer):
-    def __init__(
-        self,
-        name="b0",
-        padding_type="SAME",
-        override_params=None,
-        use_se=True,
-        class_dim=1000,
-    ):
-        super(EfficientNet, self).__init__()
-
-        model_name = "efficientnet-" + name
-        self.name = name
-        self._block_args, self._global_params = get_model_params(
-            model_name, override_params
-        )
-        self.padding_type = padding_type
-        self.use_se = use_se
-
-        self._ef = ExtractFeatures(
-            3,
-            self._block_args,
-            self._global_params,
-            self.padding_type,
-            self.use_se,
-            model_name=self.name,
-        )
-
-        output_channels = round_filters(1280, self._global_params)
-        if name == "b0_small" or name == "b0" or name == "b1":
-            oup = 320
-        elif name == "b2":
-            oup = 352
-        elif name == "b3":
-            oup = 384
-        elif name == "b4":
-            oup = 448
-        elif name == "b5":
-            oup = 512
-        elif name == "b6":
-            oup = 576
-        elif name == "b7":
-            oup = 640
-        self._conv = ConvBNLayer(
-            oup,
-            1,
-            output_channels,
-            bn_act="swish",
-            padding_type=self.padding_type,
-            name="",
-            conv_name="_conv_head",
-            bn_name="_bn1",
-            model_name=self.name,
-            cur_stage=7,
-        )
-        self._pool = AdaptiveAvgPool2D(1)
-
-        if self._global_params.dropout_rate:
-            self._drop = Dropout(
-                p=self._global_params.dropout_rate, mode="upscale_in_train"
-            )
-
-        param_attr, bias_attr = init_fc_layer("_fc")
-        self._fc = Linear(
-            output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr
-        )
-
-    def forward(self, inputs):
-        x = self._ef(inputs)
-        x = self._conv(x)
-        x = self._pool(x)
-        if self._global_params.dropout_rate:
-            x = self._drop(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._fc(x)
-        return x
-
-
-def EfficientNetB0_small(
-    padding_type="DYNAMIC", override_params=None, use_se=False, **args
-):
-    model = EfficientNet(
-        name="b0",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB0(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b0",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB1(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b1",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB2(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b2",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB3(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b3",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB4(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b4",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB5(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b5",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB6(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b6",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def EfficientNetB7(padding_type="SAME", override_params=None, use_se=True, **args):
-    model = EfficientNet(
-        name="b7",
-        padding_type=padding_type,
-        override_params=override_params,
-        use_se=use_se,
-        **args
-    )
-    return model
-
-
-def test_EfficientNetB0_small():
-    load_paddle_module_and_check(
-        EfficientNetB0_small, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_ghostnet.py b/examples/x2oneflow/paddle2oneflow/models/test_ghostnet.py
deleted file mode 100644
index d75554ae87a1fe0a01082997dd89dbfec060d2fb..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_ghostnet.py
+++ /dev/null
@@ -1,361 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import math
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
-from paddle.regularizer import L2Decay
-from paddle.nn.initializer import Uniform, KaimingNormal
-
-__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-
-        self._batch_norm = BatchNorm(
-            num_channels=out_channels,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale", regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(name=bn_name + "_offset", regularizer=L2Decay(0.0)),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class SEBlock(nn.Layer):
-    def __init__(self, num_channels, reduction_ratio=4, name=None):
-        super(SEBlock, self).__init__()
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-        self._num_channels = num_channels
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        med_ch = num_channels // reduction_ratio
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_1_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_1_offset"),
-        )
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_channels,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_2_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_2_offset"),
-        )
-
-    def forward(self, inputs):
-        pool = self.pool2d_gap(inputs)
-        pool = paddle.squeeze(pool, axis=[2, 3])
-        squeeze = self.squeeze(pool)
-        squeeze = F.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = paddle.clip(x=excitation, min=0, max=1)
-        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
-        out = paddle.multiply(inputs, excitation)
-        return out
-
-
-class GhostModule(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        output_channels,
-        kernel_size=1,
-        ratio=2,
-        dw_size=3,
-        stride=1,
-        relu=True,
-        name=None,
-    ):
-        super(GhostModule, self).__init__()
-        init_channels = int(math.ceil(output_channels / ratio))
-        new_channels = int(init_channels * (ratio - 1))
-        self.primary_conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=init_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            groups=1,
-            act="relu" if relu else None,
-            name=name + "_primary_conv",
-        )
-        self.cheap_operation = ConvBNLayer(
-            in_channels=init_channels,
-            out_channels=new_channels,
-            kernel_size=dw_size,
-            stride=1,
-            groups=init_channels,
-            act="relu" if relu else None,
-            name=name + "_cheap_operation",
-        )
-
-    def forward(self, inputs):
-        x = self.primary_conv(inputs)
-        y = self.cheap_operation(x)
-        out = paddle.concat([x, y], axis=1)
-        return out
-
-
-class GhostBottleneck(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        hidden_dim,
-        output_channels,
-        kernel_size,
-        stride,
-        use_se,
-        name=None,
-    ):
-        super(GhostBottleneck, self).__init__()
-        self._stride = stride
-        self._use_se = use_se
-        self._num_channels = in_channels
-        self._output_channels = output_channels
-        self.ghost_module_1 = GhostModule(
-            in_channels=in_channels,
-            output_channels=hidden_dim,
-            kernel_size=1,
-            stride=1,
-            relu=True,
-            name=name + "_ghost_module_1",
-        )
-        if stride == 2:
-            self.depthwise_conv = ConvBNLayer(
-                in_channels=hidden_dim,
-                out_channels=hidden_dim,
-                kernel_size=kernel_size,
-                stride=stride,
-                groups=hidden_dim,
-                act=None,
-                name=name
-                + "_depthwise_depthwise",  # looks strange due to an old typo, will be fixed later.
-            )
-        if use_se:
-            self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
-        self.ghost_module_2 = GhostModule(
-            in_channels=hidden_dim,
-            output_channels=output_channels,
-            kernel_size=1,
-            relu=False,
-            name=name + "_ghost_module_2",
-        )
-        if stride != 1 or in_channels != output_channels:
-            self.shortcut_depthwise = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=in_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                groups=in_channels,
-                act=None,
-                name=name
-                + "_shortcut_depthwise_depthwise",  # looks strange due to an old typo, will be fixed later.
-            )
-            self.shortcut_conv = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=output_channels,
-                kernel_size=1,
-                stride=1,
-                groups=1,
-                act=None,
-                name=name + "_shortcut_conv",
-            )
-
-    def forward(self, inputs):
-        x = self.ghost_module_1(inputs)
-        if self._stride == 2:
-            x = self.depthwise_conv(x)
-        if self._use_se:
-            x = self.se_block(x)
-        x = self.ghost_module_2(x)
-        if self._stride == 1 and self._num_channels == self._output_channels:
-            shortcut = inputs
-        else:
-            shortcut = self.shortcut_depthwise(inputs)
-            shortcut = self.shortcut_conv(shortcut)
-        return paddle.add(x=x, y=shortcut)
-
-
-class GhostNet(nn.Layer):
-    def __init__(self, scale, class_dim=1000):
-        super(GhostNet, self).__init__()
-        self.cfgs = [
-            # k, t, c, SE, s
-            [3, 16, 16, 0, 1],
-            [3, 48, 24, 0, 2],
-            [3, 72, 24, 0, 1],
-            [5, 72, 40, 1, 2],
-            [5, 120, 40, 1, 1],
-            [3, 240, 80, 0, 2],
-            [3, 200, 80, 0, 1],
-            [3, 184, 80, 0, 1],
-            [3, 184, 80, 0, 1],
-            [3, 480, 112, 1, 1],
-            [3, 672, 112, 1, 1],
-            [5, 672, 160, 1, 2],
-            [5, 960, 160, 0, 1],
-            [5, 960, 160, 1, 1],
-            [5, 960, 160, 0, 1],
-            [5, 960, 160, 1, 1],
-        ]
-        self.scale = scale
-        output_channels = int(self._make_divisible(16 * self.scale, 4))
-        self.conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=output_channels,
-            kernel_size=3,
-            stride=2,
-            groups=1,
-            act="relu",
-            name="conv1",
-        )
-        # build inverted residual blocks
-        idx = 0
-        self.ghost_bottleneck_list = []
-        for k, exp_size, c, use_se, s in self.cfgs:
-            in_channels = output_channels
-            output_channels = int(self._make_divisible(c * self.scale, 4))
-            hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
-            ghost_bottleneck = self.add_sublayer(
-                name="_ghostbottleneck_" + str(idx),
-                sublayer=GhostBottleneck(
-                    in_channels=in_channels,
-                    hidden_dim=hidden_dim,
-                    output_channels=output_channels,
-                    kernel_size=k,
-                    stride=s,
-                    use_se=use_se,
-                    name="_ghostbottleneck_" + str(idx),
-                ),
-            )
-            self.ghost_bottleneck_list.append(ghost_bottleneck)
-            idx += 1
-        # build last several layers
-        in_channels = output_channels
-        output_channels = int(self._make_divisible(exp_size * self.scale, 4))
-        self.conv_last = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=output_channels,
-            kernel_size=1,
-            stride=1,
-            groups=1,
-            act="relu",
-            name="conv_last",
-        )
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-        in_channels = output_channels
-        self._fc0_output_channels = 1280
-        self.fc_0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=self._fc0_output_channels,
-            kernel_size=1,
-            stride=1,
-            act="relu",
-            name="fc_0",
-        )
-        self.dropout = nn.Dropout(p=0.2)
-        stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
-        self.fc_1 = Linear(
-            self._fc0_output_channels,
-            class_dim,
-            weight_attr=ParamAttr(
-                name="fc_1_weights", initializer=Uniform(-stdv, stdv)
-            ),
-            bias_attr=ParamAttr(name="fc_1_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self.conv1(inputs)
-        for ghost_bottleneck in self.ghost_bottleneck_list:
-            x = ghost_bottleneck(x)
-        x = self.conv_last(x)
-        x = self.pool2d_gap(x)
-        x = self.fc_0(x)
-        x = self.dropout(x)
-        x = paddle.reshape(x, shape=[-1, self._fc0_output_channels])
-        x = self.fc_1(x)
-        return x
-
-    def _make_divisible(self, v, divisor, min_value=None):
-        """
-        This function is taken from the original tf repo.
-        It ensures that all layers have a channel number that is divisible by 8
-        It can be seen here:
-        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
-        """
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        # Make sure that round down does not go down by more than 10%.
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-
-
-def GhostNet_x0_5(**args):
-    model = GhostNet(scale=0.5)
-    return model
-
-
-def GhostNet_x1_0(**args):
-    model = GhostNet(scale=1.0)
-    return model
-
-
-def GhostNet_x1_3(**args):
-    model = GhostNet(scale=1.3)
-    return model
-
-
-def test_GhostNet_x0_5():
-    load_paddle_module_and_check(
-        GhostNet_x0_5, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_googlenet.py b/examples/x2oneflow/paddle2oneflow/models/test_googlenet.py
deleted file mode 100644
index 38c3ef2bca7e10222b032ddfc3eda31cc5505597..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_googlenet.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["GoogLeNet"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def xavier(channels, filter_size, name):
-    stdv = (3.0 / (filter_size ** 2 * channels)) ** 0.5
-    param_attr = ParamAttr(initializer=Uniform(-stdv, stdv), name=name + "_weights")
-    return param_attr
-
-
-class ConvLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        return y
-
-
-class Inception(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter1,
-        filter3R,
-        filter3,
-        filter5R,
-        filter5,
-        proj,
-        name=None,
-    ):
-        super(Inception, self).__init__()
-
-        self._conv1 = ConvLayer(
-            input_channels, filter1, 1, name="inception_" + name + "_1x1"
-        )
-        self._conv3r = ConvLayer(
-            input_channels, filter3R, 1, name="inception_" + name + "_3x3_reduce"
-        )
-        self._conv3 = ConvLayer(filter3R, filter3, 3, name="inception_" + name + "_3x3")
-        self._conv5r = ConvLayer(
-            input_channels, filter5R, 1, name="inception_" + name + "_5x5_reduce"
-        )
-        self._conv5 = ConvLayer(filter5R, filter5, 5, name="inception_" + name + "_5x5")
-        self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1)
-
-        self._convprj = ConvLayer(
-            input_channels, proj, 1, name="inception_" + name + "_3x3_proj"
-        )
-
-    def forward(self, inputs):
-        conv1 = self._conv1(inputs)
-
-        conv3r = self._conv3r(inputs)
-        conv3 = self._conv3(conv3r)
-
-        conv5r = self._conv5r(inputs)
-        conv5 = self._conv5(conv5r)
-
-        pool = self._pool(inputs)
-        convprj = self._convprj(pool)
-
-        cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1)
-        cat = F.relu(cat)
-        return cat
-
-
-class GoogLeNetDY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(GoogLeNetDY, self).__init__()
-        self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
-        self._pool = MaxPool2D(kernel_size=3, stride=2)
-        self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1")
-        self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3")
-
-        self._ince3a = Inception(192, 192, 64, 96, 128, 16, 32, 32, name="ince3a")
-        self._ince3b = Inception(256, 256, 128, 128, 192, 32, 96, 64, name="ince3b")
-
-        self._ince4a = Inception(480, 480, 192, 96, 208, 16, 48, 64, name="ince4a")
-        self._ince4b = Inception(512, 512, 160, 112, 224, 24, 64, 64, name="ince4b")
-        self._ince4c = Inception(512, 512, 128, 128, 256, 24, 64, 64, name="ince4c")
-        self._ince4d = Inception(512, 512, 112, 144, 288, 32, 64, 64, name="ince4d")
-        self._ince4e = Inception(528, 528, 256, 160, 320, 32, 128, 128, name="ince4e")
-
-        self._ince5a = Inception(832, 832, 256, 160, 320, 32, 128, 128, name="ince5a")
-        self._ince5b = Inception(832, 832, 384, 192, 384, 48, 128, 128, name="ince5b")
-
-        self._pool_5 = AvgPool2D(kernel_size=7, stride=7)
-
-        self._drop = Dropout(p=0.4, mode="downscale_in_infer")
-        self._fc_out = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out"),
-            bias_attr=ParamAttr(name="out_offset"),
-        )
-        self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
-        self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1")
-        self._fc_o1 = Linear(
-            1152,
-            1024,
-            weight_attr=xavier(2048, 1, "fc_o1"),
-            bias_attr=ParamAttr(name="fc_o1_offset"),
-        )
-        self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
-        self._out1 = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out1"),
-            bias_attr=ParamAttr(name="out1_offset"),
-        )
-        self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
-        self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2")
-        self._fc_o2 = Linear(
-            1152,
-            1024,
-            weight_attr=xavier(2048, 1, "fc_o2"),
-            bias_attr=ParamAttr(name="fc_o2_offset"),
-        )
-        self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
-        self._out2 = Linear(
-            1024,
-            class_dim,
-            weight_attr=xavier(1024, 1, "out2"),
-            bias_attr=ParamAttr(name="out2_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._pool(x)
-        x = self._conv_1(x)
-        x = self._conv_2(x)
-        x = self._pool(x)
-
-        x = self._ince3a(x)
-        x = self._ince3b(x)
-        x = self._pool(x)
-
-        ince4a = self._ince4a(x)
-        x = self._ince4b(ince4a)
-        x = self._ince4c(x)
-        ince4d = self._ince4d(x)
-        x = self._ince4e(ince4d)
-        x = self._pool(x)
-
-        x = self._ince5a(x)
-        ince5b = self._ince5b(x)
-
-        x = self._pool_5(ince5b)
-        x = self._drop(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        out = self._fc_out(x)
-
-        # x = self._pool_o1(ince4a)
-        # x = self._conv_o1(x)
-        # x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        # x = self._fc_o1(x)
-        # x = F.relu(x)
-        # x = self._drop_o1(x)
-        # out1 = self._out1(x)
-
-        # x = self._pool_o2(ince4d)
-        # x = self._conv_o2(x)
-        # x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        # x = self._fc_o2(x)
-        # x = self._drop_o2(x)
-        # out2 = self._out2(x)
-        return out
-
-
-def GoogLeNet(**args):
-    model = GoogLeNetDY(**args)
-    return model
-
-
-def test_GoogLeNet():
-    load_paddle_module_and_check(
-        GoogLeNet, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_inceptionv3.py b/examples/x2oneflow/paddle2oneflow/models/test_inceptionv3.py
deleted file mode 100644
index 2c9ecff7b12f45ace61db78a4d71c3f766e642e0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_inceptionv3.py
+++ /dev/null
@@ -1,598 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["InceptionV3"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self.conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self.batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        y = self.batch_norm(y)
-        return y
-
-
-class InceptionStem(nn.Layer):
-    def __init__(self):
-        super(InceptionStem, self).__init__()
-        self.conv_1a_3x3 = ConvBNLayer(
-            num_channels=3,
-            num_filters=32,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="conv_1a_3x3",
-        )
-        self.conv_2a_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=32,
-            filter_size=3,
-            stride=1,
-            act="relu",
-            name="conv_2a_3x3",
-        )
-        self.conv_2b_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=64,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="conv_2b_3x3",
-        )
-        self.maxpool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self.conv_3b_1x1 = ConvBNLayer(
-            num_channels=64,
-            num_filters=80,
-            filter_size=1,
-            act="relu",
-            name="conv_3b_1x1",
-        )
-        self.conv_4a_3x3 = ConvBNLayer(
-            num_channels=80,
-            num_filters=192,
-            filter_size=3,
-            act="relu",
-            name="conv_4a_3x3",
-        )
-
-    def forward(self, x):
-        y = self.conv_1a_3x3(x)
-        y = self.conv_2a_3x3(y)
-        y = self.conv_2b_3x3(y)
-        y = self.maxpool(y)
-        y = self.conv_3b_1x1(y)
-        y = self.conv_4a_3x3(y)
-        y = self.maxpool(y)
-        return y
-
-
-class InceptionA(nn.Layer):
-    def __init__(self, num_channels, pool_features, name=None):
-        super(InceptionA, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch1x1_" + name,
-        )
-        self.branch5x5_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=48,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch5x5_1_" + name,
-        )
-        self.branch5x5_2 = ConvBNLayer(
-            num_channels=48,
-            num_filters=64,
-            filter_size=5,
-            padding=2,
-            act="relu",
-            name="inception_a_branch5x5_2_" + name,
-        )
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_a_branch3x3dbl_3_" + name,
-        )
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=pool_features,
-            filter_size=1,
-            act="relu",
-            name="inception_a_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-        branch5x5 = self.branch5x5_1(x)
-        branch5x5 = self.branch5x5_2(branch5x5)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
-
-        branch_pool = self.branch_pool(x)
-
-        branch_pool = self.branch_pool_conv(branch_pool)
-        outputs = paddle.concat(
-            [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1
-        )
-        return outputs
-
-
-class InceptionB(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionB, self).__init__()
-        self.branch3x3 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_b_branch3x3_" + name,
-        )
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu",
-            name="inception_b_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_b_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_b_branch3x3dbl_3_" + name,
-        )
-        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
-
-    def forward(self, x):
-        branch3x3 = self.branch3x3(x)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
-
-        branch_pool = self.branch_pool(x)
-
-        outputs = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
-
-        return outputs
-
-
-class InceptionC(nn.Layer):
-    def __init__(self, num_channels, channels_7x7, name=None):
-        super(InceptionC, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch1x1_" + name,
-        )
-        self.branch7x7_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
-            stride=1,
-            act="relu",
-            name="inception_c_branch7x7_1_" + name,
-        )
-        self.branch7x7_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
-            stride=1,
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7_2_" + name,
-        )
-        self.branch7x7_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(7, 1),
-            stride=1,
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7_3_" + name,
-        )
-
-        self.branch7x7dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch7x7dbl_1_" + name,
-        )
-        self.branch7x7dbl_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7dbl_2_" + name,
-        )
-        self.branch7x7dbl_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7dbl_3_" + name,
-        )
-        self.branch7x7dbl_4 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_c_branch7x7dbl_4_" + name,
-        )
-        self.branch7x7dbl_5 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_c_branch7x7dbl_5_" + name,
-        )
-
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_c_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-
-        branch7x7 = self.branch7x7_1(x)
-        branch7x7 = self.branch7x7_2(branch7x7)
-        branch7x7 = self.branch7x7_3(branch7x7)
-
-        branch7x7dbl = self.branch7x7dbl_1(x)
-        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
-        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
-
-        branch_pool = self.branch_pool(x)
-        branch_pool = self.branch_pool_conv(branch_pool)
-
-        outputs = paddle.concat(
-            [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1
-        )
-
-        return outputs
-
-
-class InceptionD(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionD, self).__init__()
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_d_branch3x3_1_" + name,
-        )
-        self.branch3x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=320,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_d_branch3x3_2_" + name,
-        )
-        self.branch7x7x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_d_branch7x7x3_1_" + name,
-        )
-        self.branch7x7x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_d_branch7x7x3_2_" + name,
-        )
-        self.branch7x7x3_3 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_d_branch7x7x3_3_" + name,
-        )
-        self.branch7x7x3_4 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=3,
-            stride=2,
-            act="relu",
-            name="inception_d_branch7x7x3_4_" + name,
-        )
-        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
-
-    def forward(self, x):
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = self.branch3x3_2(branch3x3)
-
-        branch7x7x3 = self.branch7x7x3_1(x)
-        branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
-        branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
-        branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
-
-        branch_pool = self.branch_pool(x)
-
-        outputs = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
-        return outputs
-
-
-class InceptionE(nn.Layer):
-    def __init__(self, num_channels, name=None):
-        super(InceptionE, self).__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=320,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch1x1_" + name,
-        )
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch3x3_1_" + name,
-        )
-        self.branch3x3_2a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_e_branch3x3_2a_" + name,
-        )
-        self.branch3x3_2b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_e_branch3x3_2b_" + name,
-        )
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=448,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch3x3dbl_1_" + name,
-        )
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=448,
-            num_filters=384,
-            filter_size=3,
-            padding=1,
-            act="relu",
-            name="inception_e_branch3x3dbl_2_" + name,
-        )
-        self.branch3x3dbl_3a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_e_branch3x3dbl_3a_" + name,
-        )
-        self.branch3x3dbl_3b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_e_branch3x3dbl_3b_" + name,
-        )
-        self.branch_pool = AvgPool2D(
-            kernel_size=3, stride=1, padding=1, exclusive=False
-        )
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu",
-            name="inception_e_branch_pool_" + name,
-        )
-
-    def forward(self, x):
-        branch1x1 = self.branch1x1(x)
-
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = [
-            self.branch3x3_2a(branch3x3),
-            self.branch3x3_2b(branch3x3),
-        ]
-        branch3x3 = paddle.concat(branch3x3, axis=1)
-
-        branch3x3dbl = self.branch3x3dbl_1(x)
-        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
-        branch3x3dbl = [
-            self.branch3x3dbl_3a(branch3x3dbl),
-            self.branch3x3dbl_3b(branch3x3dbl),
-        ]
-        branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
-
-        branch_pool = self.branch_pool(x)
-        branch_pool = self.branch_pool_conv(branch_pool)
-
-        outputs = paddle.concat(
-            [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1
-        )
-        return outputs
-
-
-class InceptionV3(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(InceptionV3, self).__init__()
-        self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
-        self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
-
-        self.inception_stem = InceptionStem()
-        self.inception_block_list = []
-        for i in range(len(self.inception_a_list[0])):
-            inception_a = self.add_sublayer(
-                "inception_a_" + str(i + 1),
-                InceptionA(
-                    self.inception_a_list[0][i],
-                    self.inception_a_list[1][i],
-                    name=str(i + 1),
-                ),
-            )
-            self.inception_block_list.append(inception_a)
-        inception_b = self.add_sublayer("nception_b_1", InceptionB(288, name="1"))
-        self.inception_block_list.append(inception_b)
-
-        for i in range(len(self.inception_c_list[0])):
-            inception_c = self.add_sublayer(
-                "inception_c_" + str(i + 1),
-                InceptionC(
-                    self.inception_c_list[0][i],
-                    self.inception_c_list[1][i],
-                    name=str(i + 1),
-                ),
-            )
-            self.inception_block_list.append(inception_c)
-        inception_d = self.add_sublayer("inception_d_1", InceptionD(768, name="1"))
-        self.inception_block_list.append(inception_d)
-        inception_e = self.add_sublayer("inception_e_1", InceptionE(1280, name="1"))
-        self.inception_block_list.append(inception_e)
-        inception_e = self.add_sublayer("inception_e_2", InceptionE(2048, name="2"))
-        self.inception_block_list.append(inception_e)
-
-        self.gap = AdaptiveAvgPool2D(1)
-        self.drop = Dropout(p=0.2, mode="downscale_in_infer")
-        stdv = 1.0 / math.sqrt(2048 * 1.0)
-        self.out = Linear(
-            2048,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, x):
-        y = self.inception_stem(x)
-        for inception_block in self.inception_block_list:
-            y = inception_block(y)
-        y = self.gap(y)
-        y = paddle.reshape(y, shape=[-1, 2048])
-        y = self.drop(y)
-        y = self.out(y)
-        return y
-
-
-def test_InceptionV3():
-    load_paddle_module_and_check(
-        InceptionV3, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_inceptionv4.py b/examples/x2oneflow/paddle2oneflow/models/test_inceptionv4.py
deleted file mode 100644
index 3e5b2326b5457ccde33f0f5d619bb6bcb94cd148..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_inceptionv4.py
+++ /dev/null
@@ -1,481 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = ["InceptionV4"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InceptionStem(nn.Layer):
-    def __init__(self):
-        super(InceptionStem, self).__init__()
-        self._conv_1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2")
-        self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1")
-        self._conv_3 = ConvBNLayer(
-            32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1"
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2 = ConvBNLayer(
-            64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2"
-        )
-        self._conv1_1 = ConvBNLayer(
-            160, 64, 1, act="relu", name="inception_stem2_3x3_reduce"
-        )
-        self._conv1_2 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3")
-        self._conv2_1 = ConvBNLayer(
-            160, 64, 1, act="relu", name="inception_stem2_1x7_reduce"
-        )
-        self._conv2_2 = ConvBNLayer(
-            64, 64, (7, 1), padding=(3, 0), act="relu", name="inception_stem2_1x7"
-        )
-        self._conv2_3 = ConvBNLayer(
-            64, 64, (1, 7), padding=(0, 3), act="relu", name="inception_stem2_7x1"
-        )
-        self._conv2_4 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3_2")
-        self._conv3 = ConvBNLayer(
-            192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2"
-        )
-
-    def forward(self, inputs):
-        conv = self._conv_1(inputs)
-        conv = self._conv_2(conv)
-        conv = self._conv_3(conv)
-
-        pool1 = self._pool(conv)
-        conv2 = self._conv2(conv)
-        concat = paddle.concat([pool1, conv2], axis=1)
-
-        conv1 = self._conv1_1(concat)
-        conv1 = self._conv1_2(conv1)
-
-        conv2 = self._conv2_1(concat)
-        conv2 = self._conv2_2(conv2)
-        conv2 = self._conv2_3(conv2)
-        conv2 = self._conv2_4(conv2)
-
-        concat = paddle.concat([conv1, conv2], axis=1)
-
-        conv1 = self._conv3(concat)
-        pool1 = self._pool(concat)
-
-        concat = paddle.concat([conv1, pool1], axis=1)
-        return concat
-
-
-class InceptionA(nn.Layer):
-    def __init__(self, name):
-        super(InceptionA, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            384, 96, 1, act="relu", name="inception_a" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3"
-        )
-        self._conv4_1 = ConvBNLayer(
-            384, 64, 1, act="relu", name="inception_a" + name + "_3x3_2_reduce"
-        )
-        self._conv4_2 = ConvBNLayer(
-            64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_2"
-        )
-        self._conv4_3 = ConvBNLayer(
-            96, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_3"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-
-        conv4 = self._conv4_1(inputs)
-        conv4 = self._conv4_2(conv4)
-        conv4 = self._conv4_3(conv4)
-
-        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
-        return concat
-
-
-class ReductionA(nn.Layer):
-    def __init__(self):
-        super(ReductionA, self).__init__()
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2 = ConvBNLayer(
-            384, 384, 3, stride=2, act="relu", name="reduction_a_3x3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2"
-        )
-        self._conv3_3 = ConvBNLayer(
-            224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv2 = self._conv2(inputs)
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-        concat = paddle.concat([pool1, conv2, conv3], axis=1)
-        return concat
-
-
-class InceptionB(nn.Layer):
-    def __init__(self, name=None):
-        super(InceptionB, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            1024, 128, 1, act="relu", name="inception_b" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2"
-        )
-        self._conv3_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="inception_b" + name + "_1x7_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            192,
-            224,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7",
-        )
-        self._conv3_3 = ConvBNLayer(
-            224,
-            256,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1",
-        )
-        self._conv4_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="inception_b" + name + "_7x1_2_reduce"
-        )
-        self._conv4_2 = ConvBNLayer(
-            192,
-            192,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7_2",
-        )
-        self._conv4_3 = ConvBNLayer(
-            192,
-            224,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1_2",
-        )
-        self._conv4_4 = ConvBNLayer(
-            224,
-            224,
-            (1, 7),
-            padding=(0, 3),
-            act="relu",
-            name="inception_b" + name + "_1x7_3",
-        )
-        self._conv4_5 = ConvBNLayer(
-            224,
-            256,
-            (7, 1),
-            padding=(3, 0),
-            act="relu",
-            name="inception_b" + name + "_7x1_3",
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-
-        conv4 = self._conv4_1(inputs)
-        conv4 = self._conv4_2(conv4)
-        conv4 = self._conv4_3(conv4)
-        conv4 = self._conv4_4(conv4)
-        conv4 = self._conv4_5(conv4)
-
-        concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
-        return concat
-
-
-class ReductionB(nn.Layer):
-    def __init__(self):
-        super(ReductionB, self).__init__()
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self._conv2_1 = ConvBNLayer(
-            1024, 192, 1, act="relu", name="reduction_b_3x3_reduce"
-        )
-        self._conv2_2 = ConvBNLayer(
-            192, 192, 3, stride=2, act="relu", name="reduction_b_3x3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            1024, 256, 1, act="relu", name="reduction_b_1x7_reduce"
-        )
-        self._conv3_2 = ConvBNLayer(
-            256, 256, (1, 7), padding=(0, 3), act="relu", name="reduction_b_1x7"
-        )
-        self._conv3_3 = ConvBNLayer(
-            256, 320, (7, 1), padding=(3, 0), act="relu", name="reduction_b_7x1"
-        )
-        self._conv3_4 = ConvBNLayer(
-            320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2"
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-
-        conv2 = self._conv2_1(inputs)
-        conv2 = self._conv2_2(conv2)
-
-        conv3 = self._conv3_1(inputs)
-        conv3 = self._conv3_2(conv3)
-        conv3 = self._conv3_3(conv3)
-        conv3 = self._conv3_4(conv3)
-
-        concat = paddle.concat([pool1, conv2, conv3], axis=1)
-
-        return concat
-
-
-class InceptionC(nn.Layer):
-    def __init__(self, name=None):
-        super(InceptionC, self).__init__()
-        self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
-        self._conv1 = ConvBNLayer(
-            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1"
-        )
-        self._conv2 = ConvBNLayer(
-            1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2"
-        )
-        self._conv3_0 = ConvBNLayer(
-            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3"
-        )
-        self._conv3_1 = ConvBNLayer(
-            384,
-            256,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3",
-        )
-        self._conv3_2 = ConvBNLayer(
-            384,
-            256,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1",
-        )
-        self._conv4_0 = ConvBNLayer(
-            1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4"
-        )
-        self._conv4_00 = ConvBNLayer(
-            384,
-            448,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3_2",
-        )
-        self._conv4_000 = ConvBNLayer(
-            448,
-            512,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1_2",
-        )
-        self._conv4_1 = ConvBNLayer(
-            512,
-            256,
-            (1, 3),
-            padding=(0, 1),
-            act="relu",
-            name="inception_c" + name + "_1x3_3",
-        )
-        self._conv4_2 = ConvBNLayer(
-            512,
-            256,
-            (3, 1),
-            padding=(1, 0),
-            act="relu",
-            name="inception_c" + name + "_3x1_3",
-        )
-
-    def forward(self, inputs):
-        pool1 = self._pool(inputs)
-        conv1 = self._conv1(pool1)
-
-        conv2 = self._conv2(inputs)
-
-        conv3 = self._conv3_0(inputs)
-        conv3_1 = self._conv3_1(conv3)
-        conv3_2 = self._conv3_2(conv3)
-
-        conv4 = self._conv4_0(inputs)
-        conv4 = self._conv4_00(conv4)
-        conv4 = self._conv4_000(conv4)
-        conv4_1 = self._conv4_1(conv4)
-        conv4_2 = self._conv4_2(conv4)
-
-        concat = paddle.concat(
-            [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1
-        )
-
-        return concat
-
-
-class InceptionV4DY(nn.Layer):
-    def __init__(self, class_dim=1000):
-        super(InceptionV4DY, self).__init__()
-        self._inception_stem = InceptionStem()
-
-        self._inceptionA_1 = InceptionA(name="1")
-        self._inceptionA_2 = InceptionA(name="2")
-        self._inceptionA_3 = InceptionA(name="3")
-        self._inceptionA_4 = InceptionA(name="4")
-        self._reductionA = ReductionA()
-
-        self._inceptionB_1 = InceptionB(name="1")
-        self._inceptionB_2 = InceptionB(name="2")
-        self._inceptionB_3 = InceptionB(name="3")
-        self._inceptionB_4 = InceptionB(name="4")
-        self._inceptionB_5 = InceptionB(name="5")
-        self._inceptionB_6 = InceptionB(name="6")
-        self._inceptionB_7 = InceptionB(name="7")
-        self._reductionB = ReductionB()
-
-        self._inceptionC_1 = InceptionC(name="1")
-        self._inceptionC_2 = InceptionC(name="2")
-        self._inceptionC_3 = InceptionC(name="3")
-
-        self.avg_pool = AdaptiveAvgPool2D(1)
-        self._drop = Dropout(p=0.2, mode="downscale_in_infer")
-        stdv = 1.0 / math.sqrt(1536 * 1.0)
-        self.out = Linear(
-            1536,
-            class_dim,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name="final_fc_weights"
-            ),
-            bias_attr=ParamAttr(name="final_fc_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._inception_stem(inputs)
-
-        x = self._inceptionA_1(x)
-        x = self._inceptionA_2(x)
-        x = self._inceptionA_3(x)
-        x = self._inceptionA_4(x)
-        x = self._reductionA(x)
-
-        x = self._inceptionB_1(x)
-        x = self._inceptionB_2(x)
-        x = self._inceptionB_3(x)
-        x = self._inceptionB_4(x)
-        x = self._inceptionB_5(x)
-        x = self._inceptionB_6(x)
-        x = self._inceptionB_7(x)
-        x = self._reductionB(x)
-
-        x = self._inceptionC_1(x)
-        x = self._inceptionC_2(x)
-        x = self._inceptionC_3(x)
-
-        x = self.avg_pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._drop(x)
-        x = self.out(x)
-        return x
-
-
-def InceptionV4(**args):
-    model = InceptionV4DY(**args)
-    return model
-
-
-def test_InceptionV4():
-    load_paddle_module_and_check(
-        InceptionV4, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv1.py b/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv1.py
deleted file mode 100644
index f5e217913a4ea3276c74a02a18a493edfee23a73..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv1.py
+++ /dev/null
@@ -1,299 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import KaimingNormal
-import math
-
-__all__ = ["MobileNetV1_x0_25", "MobileNetV1_x0_5", "MobileNetV1_x0_75", "MobileNetV1"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        filter_size,
-        num_filters,
-        stride,
-        padding,
-        channels=None,
-        num_groups=1,
-        act="relu",
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name + "_bn_scale"),
-            bias_attr=ParamAttr(name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class DepthwiseSeparable(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters1,
-        num_filters2,
-        num_groups,
-        stride,
-        scale,
-        name=None,
-    ):
-        super(DepthwiseSeparable, self).__init__()
-
-        self._depthwise_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=int(num_filters1 * scale),
-            filter_size=3,
-            stride=stride,
-            padding=1,
-            num_groups=int(num_groups * scale),
-            name=name + "_dw",
-        )
-
-        self._pointwise_conv = ConvBNLayer(
-            num_channels=int(num_filters1 * scale),
-            filter_size=1,
-            num_filters=int(num_filters2 * scale),
-            stride=1,
-            padding=0,
-            name=name + "_sep",
-        )
-
-    def forward(self, inputs):
-        y = self._depthwise_conv(inputs)
-        y = self._pointwise_conv(y)
-        return y
-
-
-class MobileNet(nn.Layer):
-    def __init__(self, scale=1.0, class_dim=1000):
-        super(MobileNet, self).__init__()
-        self.scale = scale
-        self.block_list = []
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            filter_size=3,
-            channels=3,
-            num_filters=int(32 * scale),
-            stride=2,
-            padding=1,
-            name="conv1",
-        )
-
-        conv2_1 = self.add_sublayer(
-            "conv2_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(32 * scale),
-                num_filters1=32,
-                num_filters2=64,
-                num_groups=32,
-                stride=1,
-                scale=scale,
-                name="conv2_1",
-            ),
-        )
-        self.block_list.append(conv2_1)
-
-        conv2_2 = self.add_sublayer(
-            "conv2_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(64 * scale),
-                num_filters1=64,
-                num_filters2=128,
-                num_groups=64,
-                stride=2,
-                scale=scale,
-                name="conv2_2",
-            ),
-        )
-        self.block_list.append(conv2_2)
-
-        conv3_1 = self.add_sublayer(
-            "conv3_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(128 * scale),
-                num_filters1=128,
-                num_filters2=128,
-                num_groups=128,
-                stride=1,
-                scale=scale,
-                name="conv3_1",
-            ),
-        )
-        self.block_list.append(conv3_1)
-
-        conv3_2 = self.add_sublayer(
-            "conv3_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(128 * scale),
-                num_filters1=128,
-                num_filters2=256,
-                num_groups=128,
-                stride=2,
-                scale=scale,
-                name="conv3_2",
-            ),
-        )
-        self.block_list.append(conv3_2)
-
-        conv4_1 = self.add_sublayer(
-            "conv4_1",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(256 * scale),
-                num_filters1=256,
-                num_filters2=256,
-                num_groups=256,
-                stride=1,
-                scale=scale,
-                name="conv4_1",
-            ),
-        )
-        self.block_list.append(conv4_1)
-
-        conv4_2 = self.add_sublayer(
-            "conv4_2",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(256 * scale),
-                num_filters1=256,
-                num_filters2=512,
-                num_groups=256,
-                stride=2,
-                scale=scale,
-                name="conv4_2",
-            ),
-        )
-        self.block_list.append(conv4_2)
-
-        for i in range(5):
-            conv5 = self.add_sublayer(
-                "conv5_" + str(i + 1),
-                sublayer=DepthwiseSeparable(
-                    num_channels=int(512 * scale),
-                    num_filters1=512,
-                    num_filters2=512,
-                    num_groups=512,
-                    stride=1,
-                    scale=scale,
-                    name="conv5_" + str(i + 1),
-                ),
-            )
-            self.block_list.append(conv5)
-
-        conv5_6 = self.add_sublayer(
-            "conv5_6",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(512 * scale),
-                num_filters1=512,
-                num_filters2=1024,
-                num_groups=512,
-                stride=2,
-                scale=scale,
-                name="conv5_6",
-            ),
-        )
-        self.block_list.append(conv5_6)
-
-        conv6 = self.add_sublayer(
-            "conv6",
-            sublayer=DepthwiseSeparable(
-                num_channels=int(1024 * scale),
-                num_filters1=1024,
-                num_filters2=1024,
-                num_groups=1024,
-                stride=1,
-                scale=scale,
-                name="conv6",
-            ),
-        )
-        self.block_list.append(conv6)
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.out = Linear(
-            int(1024 * scale),
-            class_dim,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name="fc7_weights"),
-            bias_attr=ParamAttr(name="fc7_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-
-def MobileNetV1_x0_25(**args):
-    model = MobileNet(scale=0.25, **args)
-    return model
-
-
-def MobileNetV1_x0_5(**args):
-    model = MobileNet(scale=0.5, **args)
-    return model
-
-
-def MobileNetV1_x0_75(**args):
-    model = MobileNet(scale=0.75, **args)
-    return model
-
-
-def MobileNetV1(**args):
-    model = MobileNet(scale=1.0, **args)
-    return model
-
-
-def test_MobileNetV1():
-    load_paddle_module_and_check(
-        MobileNetV1, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv2.py b/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv2.py
deleted file mode 100644
index dcc2cee1b3273ff978962c24e0da6ee67d1e1c51..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv2.py
+++ /dev/null
@@ -1,286 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-import math
-
-__all__ = [
-    "MobileNetV2_x0_25",
-    "MobileNetV2_x0_5",
-    "MobileNetV2_x0_75",
-    "MobileNetV2",
-    "MobileNetV2_x1_5",
-    "MobileNetV2_x2_0",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        filter_size,
-        num_filters,
-        stride,
-        padding,
-        channels=None,
-        num_groups=1,
-        name=None,
-        use_cudnn=True,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            num_filters,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs, if_act=True):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if if_act:
-            y = F.relu6(y)
-        return y
-
-
-class InvertedResidualUnit(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_in_filter,
-        num_filters,
-        stride,
-        filter_size,
-        padding,
-        expansion_factor,
-        name,
-    ):
-        super(InvertedResidualUnit, self).__init__()
-        num_expfilter = int(round(num_in_filter * expansion_factor))
-        self._expand_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_expfilter,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            name=name + "_expand",
-        )
-
-        self._bottleneck_conv = ConvBNLayer(
-            num_channels=num_expfilter,
-            num_filters=num_expfilter,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            num_groups=num_expfilter,
-            use_cudnn=False,
-            name=name + "_dwise",
-        )
-
-        self._linear_conv = ConvBNLayer(
-            num_channels=num_expfilter,
-            num_filters=num_filters,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            name=name + "_linear",
-        )
-
-    def forward(self, inputs, ifshortcut):
-        y = self._expand_conv(inputs, if_act=True)
-        y = self._bottleneck_conv(y, if_act=True)
-        y = self._linear_conv(y, if_act=False)
-        if ifshortcut:
-            y = paddle.add(inputs, y)
-        return y
-
-
-class InvresiBlocks(nn.Layer):
-    def __init__(self, in_c, t, c, n, s, name):
-        super(InvresiBlocks, self).__init__()
-
-        self._first_block = InvertedResidualUnit(
-            num_channels=in_c,
-            num_in_filter=in_c,
-            num_filters=c,
-            stride=s,
-            filter_size=3,
-            padding=1,
-            expansion_factor=t,
-            name=name + "_1",
-        )
-
-        self._block_list = []
-        for i in range(1, n):
-            block = self.add_sublayer(
-                name + "_" + str(i + 1),
-                sublayer=InvertedResidualUnit(
-                    num_channels=c,
-                    num_in_filter=c,
-                    num_filters=c,
-                    stride=1,
-                    filter_size=3,
-                    padding=1,
-                    expansion_factor=t,
-                    name=name + "_" + str(i + 1),
-                ),
-            )
-            self._block_list.append(block)
-
-    def forward(self, inputs):
-        y = self._first_block(inputs, ifshortcut=False)
-        for block in self._block_list:
-            y = block(y, ifshortcut=True)
-        return y
-
-
-class MobileNet(nn.Layer):
-    def __init__(self, class_dim=1000, scale=1.0, prefix_name="", **args):
-        super(MobileNet, self).__init__()
-        self.scale = scale
-        self.class_dim = class_dim
-
-        bottleneck_params_list = [
-            (1, 16, 1, 1),
-            (6, 24, 2, 2),
-            (6, 32, 3, 2),
-            (6, 64, 4, 2),
-            (6, 96, 3, 1),
-            (6, 160, 3, 2),
-            (6, 320, 1, 1),
-        ]
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=int(32 * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            name=prefix_name + "conv1_1",
-        )
-
-        self.block_list = []
-        i = 1
-        in_c = int(32 * scale)
-        for layer_setting in bottleneck_params_list:
-            t, c, n, s = layer_setting
-            i += 1
-            block = self.add_sublayer(
-                prefix_name + "conv" + str(i),
-                sublayer=InvresiBlocks(
-                    in_c=in_c,
-                    t=t,
-                    c=int(c * scale),
-                    n=n,
-                    s=s,
-                    name=prefix_name + "conv" + str(i),
-                ),
-            )
-            self.block_list.append(block)
-            in_c = int(c * scale)
-
-        self.out_c = int(1280 * scale) if scale > 1.0 else 1280
-        self.conv9 = ConvBNLayer(
-            num_channels=in_c,
-            num_filters=self.out_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            name=prefix_name + "conv9",
-        )
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.out = Linear(
-            self.out_c,
-            class_dim,
-            weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
-            bias_attr=ParamAttr(name=prefix_name + "fc10_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs, if_act=True)
-        for block in self.block_list:
-            y = block(y)
-        y = self.conv9(y, if_act=True)
-        y = self.pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self.out(y)
-        return y
-
-
-def MobileNetV2_x0_25(**args):
-    model = MobileNet(scale=0.25, **args)
-    return model
-
-
-def MobileNetV2_x0_5(**args):
-    model = MobileNet(scale=0.5, **args)
-    return model
-
-
-def MobileNetV2_x0_75(**args):
-    model = MobileNet(scale=0.75, **args)
-    return model
-
-
-def MobileNetV2(**args):
-    model = MobileNet(scale=1.0, **args)
-    return model
-
-
-def MobileNetV2_x1_5(**args):
-    model = MobileNet(scale=1.5, **args)
-    return model
-
-
-def MobileNetV2_x2_0(**args):
-    model = MobileNet(scale=2.0, **args)
-    return model
-
-
-def test_MobileNetV2():
-    load_paddle_module_and_check(
-        MobileNetV2, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
-
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv3.py b/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv3.py
deleted file mode 100644
index c3b4a204f1e4e4ca3da79feabbcb4544cbc0dcb4..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_mobilenetv3.py
+++ /dev/null
@@ -1,377 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn.functional import hardswish, hardsigmoid
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.regularizer import L2Decay
-
-import math
-
-__all__ = [
-    "MobileNetV3_small_x0_35",
-    "MobileNetV3_small_x0_5",
-    "MobileNetV3_small_x0_75",
-    "MobileNetV3_small_x1_0",
-    "MobileNetV3_small_x1_25",
-    "MobileNetV3_large_x0_35",
-    "MobileNetV3_large_x0_5",
-    "MobileNetV3_large_x0_75",
-    "MobileNetV3_large_x1_0",
-    "MobileNetV3_large_x1_25",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class MobileNetV3(nn.Layer):
-    def __init__(self, scale=1.0, model_name="small", dropout_prob=0.2, class_dim=1000):
-        super(MobileNetV3, self).__init__()
-
-        inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],
-                [3, 240, 80, False, "hardswish", 2],
-                [3, 200, 80, False, "hardswish", 1],
-                [3, 184, 80, False, "hardswish", 1],
-                [3, 184, 80, False, "hardswish", 1],
-                [3, 480, 112, True, "hardswish", 1],
-                [3, 672, 112, True, "hardswish", 1],
-                [5, 672, 160, True, "hardswish", 2],
-                [5, 960, 160, True, "hardswish", 1],
-                [5, 960, 160, True, "hardswish", 1],
-            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],
-                [5, 96, 40, True, "hardswish", 2],
-                [5, 240, 40, True, "hardswish", 1],
-                [5, 240, 40, True, "hardswish", 1],
-                [5, 120, 48, True, "hardswish", 1],
-                [5, 144, 48, True, "hardswish", 1],
-                [5, 288, 96, True, "hardswish", 2],
-                [5, 576, 96, True, "hardswish", 1],
-                [5, 576, 96, True, "hardswish", 1],
-            ]
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name)
-            )
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            act="hardswish",
-            name="conv1",
-        )
-
-        self.block_list = []
-        i = 0
-        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in self.cfg:
-            block = self.add_sublayer(
-                "conv" + str(i + 2),
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2),
-                ),
-            )
-            self.block_list.append(block)
-            inplanes = make_divisible(scale * c)
-            i += 1
-
-        self.last_second_conv = ConvBNLayer(
-            in_c=inplanes,
-            out_c=make_divisible(scale * self.cls_ch_squeeze),
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act="hardswish",
-            name="conv_last",
-        )
-
-        self.pool = AdaptiveAvgPool2D(1)
-
-        self.last_conv = Conv2D(
-            in_channels=make_divisible(scale * self.cls_ch_squeeze),
-            out_channels=self.cls_ch_expand,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name="last_1x1_conv_weights"),
-            bias_attr=False,
-        )
-
-        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
-
-        self.out = Linear(
-            self.cls_ch_expand,
-            class_dim,
-            weight_attr=ParamAttr("fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs, label=None):
-        x = self.conv1(inputs)
-
-        for block in self.block_list:
-            x = block(x)
-
-        x = self.last_second_conv(x)
-        x = self.pool(x)
-
-        x = self.last_conv(x)
-        x = hardswish(x)
-        x = self.dropout(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self.out(x)
-
-        return x
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        in_c,
-        out_c,
-        filter_size,
-        stride,
-        padding,
-        num_groups=1,
-        if_act=True,
-        act=None,
-        use_cudnn=True,
-        name="",
-    ):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
-        self.conv = Conv2D(
-            in_channels=in_c,
-            out_channels=out_c,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        self.bn = BatchNorm(
-            num_channels=out_c,
-            act=None,
-            param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = F.relu(x)
-            elif self.act == "hardswish":
-                x = hardswish(x)
-            else:
-                print("The activation function is selected incorrectly.")
-                exit()
-        return x
-
-
-class ResidualUnit(nn.Layer):
-    def __init__(
-        self, in_c, mid_c, out_c, filter_size, stride, use_se, act=None, name=""
-    ):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_c == out_c
-        self.if_se = use_se
-
-        self.expand_conv = ConvBNLayer(
-            in_c=in_c,
-            out_c=mid_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand",
-        )
-        self.bottleneck_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=mid_c,
-            filter_size=filter_size,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            num_groups=mid_c,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise",
-        )
-        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=out_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear",
-        )
-
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = paddle.add(inputs, x)
-        return x
-
-
-class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = AdaptiveAvgPool2D(1)
-        self.conv1 = Conv2D(
-            in_channels=channel,
-            out_channels=channel // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name=name + "_1_weights"),
-            bias_attr=ParamAttr(name=name + "_1_offset"),
-        )
-        self.conv2 = Conv2D(
-            in_channels=channel // reduction,
-            out_channels=channel,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name + "_2_weights"),
-            bias_attr=ParamAttr(name=name + "_2_offset"),
-        )
-
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
-        return paddle.multiply(x=inputs, y=outputs)
-
-
-def MobileNetV3_small_x0_35(**args):
-    model = MobileNetV3(model_name="small", scale=0.35, **args)
-    return model
-
-
-def MobileNetV3_small_x0_5(**args):
-    model = MobileNetV3(model_name="small", scale=0.5, **args)
-    return model
-
-
-def MobileNetV3_small_x0_75(**args):
-    model = MobileNetV3(model_name="small", scale=0.75, **args)
-    return model
-
-
-def MobileNetV3_small_x1_0(**args):
-    model = MobileNetV3(model_name="small", scale=1.0, **args)
-    return model
-
-
-def MobileNetV3_small_x1_25(**args):
-    model = MobileNetV3(model_name="small", scale=1.25, **args)
-    return model
-
-
-def MobileNetV3_large_x0_35(**args):
-    model = MobileNetV3(model_name="large", scale=0.35, **args)
-    return model
-
-
-def MobileNetV3_large_x0_5(**args):
-    model = MobileNetV3(model_name="large", scale=0.5, **args)
-    return model
-
-
-def MobileNetV3_large_x0_75(**args):
-    model = MobileNetV3(model_name="large", scale=0.75, **args)
-    return model
-
-
-def MobileNetV3_large_x1_0(**args):
-    model = MobileNetV3(model_name="large", scale=1.0, **args)
-    return model
-
-
-def MobileNetV3_large_x1_25(**args):
-    model = MobileNetV3(model_name="large", scale=1.25, **args)
-    return model
-
-
-def test_MobileNetV3():
-    load_paddle_module_and_check(
-        MobileNetV3, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_regnet.py b/examples/x2oneflow/paddle2oneflow/models/test_regnet.py
deleted file mode 100644
index 97d0a8a0ba7f3e86c1342b4333e2400ff4445f0b..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_regnet.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-
-__all__ = [
-    "RegNetX_200MF",
-    "RegNetX_4GF",
-    "RegNetX_32GF",
-    "RegNetY_200MF",
-    "RegNetY_4GF",
-    "RegNetY_32GF",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def quantize_float(f, q):
-    """Converts a float to closest non-zero int divisible by q."""
-    return int(round(f / q) * q)
-
-
-def adjust_ws_gs_comp(ws, bms, gs):
-    """Adjusts the compatibility of widths and groups."""
-    ws_bot = [int(w * b) for w, b in zip(ws, bms)]
-    gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
-    ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
-    ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
-    return ws, gs
-
-
-def get_stages_from_blocks(ws, rs):
-    """Gets ws/ds of network at each stage from per block values."""
-    ts = [
-        w != wp or r != rp
-        for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
-    ]
-    s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
-    s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
-    return s_ws, s_ds
-
-
-def generate_regnet(w_a, w_0, w_m, d, q=8):
-    """Generates per block ws from RegNet parameters."""
-    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
-    ws_cont = np.arange(d) * w_a + w_0
-    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
-    ws = w_0 * np.power(w_m, ks)
-    ws = np.round(np.divide(ws, q)) * q
-    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
-    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
-    return ws, num_stages, max_stage, ws_cont
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        padding=0,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
-            bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"),
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + ".output.1.w_0"),
-            bias_attr=ParamAttr(bn_name + ".output.1.b_0"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        bm,
-        gw,
-        se_on,
-        se_r,
-        shortcut=True,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        # Compute the bottleneck width
-        w_b = int(round(num_filters * bm))
-        # Compute the number of groups
-        num_gs = w_b // gw
-        self.se_on = se_on
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=w_b,
-            filter_size=1,
-            padding=0,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=w_b,
-            num_filters=w_b,
-            filter_size=3,
-            stride=stride,
-            padding=1,
-            groups=num_gs,
-            act="relu",
-            name=name + "_branch2b",
-        )
-        if se_on:
-            w_se = int(round(num_channels * se_r))
-            self.se_block = SELayer(
-                num_channels=w_b,
-                num_filters=w_b,
-                reduction_ratio=w_se,
-                name=name + "_branch2se",
-            )
-        self.conv2 = ConvBNLayer(
-            num_channels=w_b,
-            num_filters=num_filters,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        if self.se_on:
-            conv1 = self.se_block(conv1)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
-        super(SELayer, self).__init__()
-
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-
-        self._num_channels = num_channels
-
-        med_ch = int(num_channels / reduction_ratio)
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_sqz_offset"),
-        )
-
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_filters,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_exc_offset"),
-        )
-
-    def forward(self, input):
-        pool = self.pool2d_gap(input)
-        pool = paddle.reshape(pool, shape=[-1, self._num_channels])
-        squeeze = self.squeeze(pool)
-        squeeze = F.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = F.sigmoid(excitation)
-        excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1])
-        out = input * excitation
-        return out
-
-
-class RegNet(nn.Layer):
-    def __init__(
-        self, w_a, w_0, w_m, d, group_w, bot_mul, q=8, se_on=False, class_dim=1000
-    ):
-        super(RegNet, self).__init__()
-
-        # Generate RegNet ws per block
-        b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q)
-        # Convert to per stage format
-        ws, ds = get_stages_from_blocks(b_ws, b_ws)
-        # Generate group widths and bot muls
-        gws = [group_w for _ in range(num_s)]
-        bms = [bot_mul for _ in range(num_s)]
-        # Adjust the compatibility of ws and gws
-        ws, gws = adjust_ws_gs_comp(ws, bms, gws)
-        # Use the same stride for each stage
-        ss = [2 for _ in range(num_s)]
-        # Use SE for RegNetY
-        se_r = 0.25
-        # Construct the model
-        # Group params by stage
-        stage_params = list(zip(ds, ws, ss, bms, gws))
-        # Construct the stem
-        stem_type = "simple_stem_in"
-        stem_w = 32
-        block_type = "res_bottleneck_block"
-
-        self.conv = ConvBNLayer(
-            num_channels=3,
-            num_filters=stem_w,
-            filter_size=3,
-            stride=2,
-            padding=1,
-            act="relu",
-            name="stem_conv",
-        )
-
-        self.block_list = []
-        for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
-            shortcut = False
-            for i in range(d):
-                num_channels = stem_w if block == i == 0 else in_channels
-                # Stride apply to the first block of the stage
-                b_stride = stride if i == 0 else 1
-                conv_name = "s" + str(block + 1) + "_b" + str(i + 1)  # chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    conv_name,
-                    BottleneckBlock(
-                        num_channels=num_channels,
-                        num_filters=w_out,
-                        stride=b_stride,
-                        bm=bm,
-                        gw=gw,
-                        se_on=se_on,
-                        se_r=se_r,
-                        shortcut=shortcut,
-                        name=conv_name,
-                    ),
-                )
-                in_channels = w_out
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = w_out
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
-            bias_attr=ParamAttr(name="fc_0.b_0"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def RegNetX_200MF(**args):
-    model = RegNet(
-        w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetX_4GF(**args):
-    model = RegNet(
-        w_a=38.65, w_0=96, w_m=2.43, d=23, group_w=40, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetX_32GF(**args):
-    model = RegNet(
-        w_a=69.86, w_0=320, w_m=2.0, d=23, group_w=168, bot_mul=1.0, q=8, **args
-    )
-    return model
-
-
-def RegNetY_200MF(**args):
-    model = RegNet(
-        w_a=36.44,
-        w_0=24,
-        w_m=2.49,
-        d=13,
-        group_w=8,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def RegNetY_4GF(**args):
-    model = RegNet(
-        w_a=31.41,
-        w_0=96,
-        w_m=2.24,
-        d=22,
-        group_w=64,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def RegNetY_32GF(**args):
-    model = RegNet(
-        w_a=115.89,
-        w_0=232,
-        w_m=2.53,
-        d=20,
-        group_w=232,
-        bot_mul=1.0,
-        q=8,
-        se_on=True,
-        **args
-    )
-    return model
-
-
-def test_RegNetX_200MF():
-    load_paddle_module_and_check(
-        RegNetX_200MF, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
-
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_repvgg.py b/examples/x2oneflow/paddle2oneflow/models/test_repvgg.py
deleted file mode 100644
index d314e2fd4cfd80bda4e57f2c3da62328f92b850c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_repvgg.py
+++ /dev/null
@@ -1,384 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle.nn as nn
-import paddle
-import numpy as np
-
-__all__ = [
-    "RepVGG",
-    "RepVGG_A0",
-    "RepVGG_A1",
-    "RepVGG_A2",
-    "RepVGG_B0",
-    "RepVGG_B1",
-    "RepVGG_B2",
-    "RepVGG_B3",
-    "RepVGG_B1g2",
-    "RepVGG_B1g4",
-    "RepVGG_B2g2",
-    "RepVGG_B2g4",
-    "RepVGG_B3g2",
-    "RepVGG_B3g4",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBN(nn.Layer):
-    def __init__(
-        self, in_channels, out_channels, kernel_size, stride, padding, groups=1
-    ):
-        super(ConvBN, self).__init__()
-        self.conv = nn.Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias_attr=False,
-        )
-        self.bn = nn.BatchNorm2D(num_features=out_channels)
-
-    def forward(self, x):
-        y = self.conv(x)
-        y = self.bn(y)
-        return y
-
-
-class RepVGGBlock(nn.Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode="zeros",
-    ):
-        super(RepVGGBlock, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.padding = padding
-        self.dilation = dilation
-        self.groups = groups
-        self.padding_mode = padding_mode
-
-        assert kernel_size == 3
-        assert padding == 1
-
-        padding_11 = padding - kernel_size // 2
-
-        self.nonlinearity = nn.ReLU()
-
-        self.rbr_identity = (
-            nn.BatchNorm2D(num_features=in_channels)
-            if out_channels == in_channels and stride == 1
-            else None
-        )
-        self.rbr_dense = ConvBN(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-        )
-        self.rbr_1x1 = ConvBN(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=stride,
-            padding=padding_11,
-            groups=groups,
-        )
-
-    def forward(self, inputs):
-        if not self.training:
-            return self.nonlinearity(self.rbr_reparam(inputs))
-
-        if self.rbr_identity is None:
-            id_out = 0
-        else:
-            id_out = self.rbr_identity(inputs)
-        return self.nonlinearity(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
-
-    def eval(self):
-        if not hasattr(self, "rbr_reparam"):
-            self.rbr_reparam = nn.Conv2D(
-                in_channels=self.in_channels,
-                out_channels=self.out_channels,
-                kernel_size=self.kernel_size,
-                stride=self.stride,
-                padding=self.padding,
-                dilation=self.dilation,
-                groups=self.groups,
-                padding_mode=self.padding_mode,
-            )
-        self.training = False
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.rbr_reparam.weight.set_value(kernel)
-        self.rbr_reparam.bias.set_value(bias)
-        for layer in self.sublayers():
-            layer.eval()
-
-    def get_equivalent_kernel_bias(self):
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
-        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
-        return (
-            kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid,
-            bias3x3 + bias1x1 + biasid,
-        )
-
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
-        if kernel1x1 is None:
-            return 0
-        else:
-            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
-
-    def _fuse_bn_tensor(self, branch):
-        if branch is None:
-            return 0, 0
-        if isinstance(branch, ConvBN):
-            kernel = branch.conv.weight
-            running_mean = branch.bn._mean
-            running_var = branch.bn._variance
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn._epsilon
-        else:
-            assert isinstance(branch, nn.BatchNorm2D)
-            if not hasattr(self, "id_tensor"):
-                input_dim = self.in_channels // self.groups
-                kernel_value = np.zeros(
-                    (self.in_channels, input_dim, 3, 3), dtype=np.float32
-                )
-                for i in range(self.in_channels):
-                    kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = paddle.to_tensor(kernel_value)
-            kernel = self.id_tensor
-            running_mean = branch._mean
-            running_var = branch._variance
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch._epsilon
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape((-1, 1, 1, 1))
-        return kernel * t, beta - running_mean * gamma / std
-
-
-class RepVGG(nn.Layer):
-    def __init__(
-        self,
-        num_blocks,
-        width_multiplier=None,
-        override_groups_map=None,
-        class_dim=1000,
-    ):
-        super(RepVGG, self).__init__()
-
-        assert len(width_multiplier) == 4
-        self.override_groups_map = override_groups_map or dict()
-
-        assert 0 not in self.override_groups_map
-
-        self.in_planes = min(64, int(64 * width_multiplier[0]))
-
-        self.stage0 = RepVGGBlock(
-            in_channels=3,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * width_multiplier[0]), num_blocks[0], stride=2
-        )
-        self.stage2 = self._make_stage(
-            int(128 * width_multiplier[1]), num_blocks[1], stride=2
-        )
-        self.stage3 = self._make_stage(
-            int(256 * width_multiplier[2]), num_blocks[2], stride=2
-        )
-        self.stage4 = self._make_stage(
-            int(512 * width_multiplier[3]), num_blocks[3], stride=2
-        )
-        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
-        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)
-
-    def _make_stage(self, planes, num_blocks, stride):
-        strides = [stride] + [1] * (num_blocks - 1)
-        blocks = []
-        for stride in strides:
-            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
-            blocks.append(
-                RepVGGBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=cur_groups,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.Sequential(*blocks)
-
-    def forward(self, x):
-        out = self.stage0(x)
-        out = self.stage1(out)
-        out = self.stage2(out)
-        out = self.stage3(out)
-        out = self.stage4(out)
-        out = self.gap(out)
-        out = paddle.flatten(out, start_axis=1)
-        out = self.linear(out)
-        return out
-
-
-optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
-g2_map = {l: 2 for l in optional_groupwise_layers}
-g4_map = {l: 4 for l in optional_groupwise_layers}
-
-
-def RepVGG_A0(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[0.75, 0.75, 0.75, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_A1(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[1, 1, 1, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_A2(**kwargs):
-    return RepVGG(
-        num_blocks=[2, 4, 14, 1],
-        width_multiplier=[1.5, 1.5, 1.5, 2.75],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B0(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[1, 1, 1, 2.5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B1(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B1g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B1g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2, 2, 2, 4],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def RepVGG_B2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B2g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B2g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[2.5, 2.5, 2.5, 5],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def RepVGG_B3(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=None,
-        **kwargs
-    )
-
-
-def RepVGG_B3g2(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=g2_map,
-        **kwargs
-    )
-
-
-def RepVGG_B3g4(**kwargs):
-    return RepVGG(
-        num_blocks=[4, 6, 16, 1],
-        width_multiplier=[3, 3, 3, 5],
-        override_groups_map=g4_map,
-        **kwargs
-    )
-
-
-def test_RepVGG_A0():
-    load_paddle_module_and_check(
-        RepVGG_A0, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_res2net.py b/examples/x2oneflow/paddle2oneflow/models/test_res2net.py
deleted file mode 100644
index 054a9e39c8037b92c9a7b384a20a8b2128932ae1..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_res2net.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = [
-    "Res2Net50_48w_2s",
-    "Res2Net50_26w_4s",
-    "Res2Net50_14w_8s",
-    "Res2Net50_48w_2s",
-    "Res2Net50_26w_6s",
-    "Res2Net50_26w_8s",
-    "Res2Net101_26w_4s",
-    "Res2Net152_26w_4s",
-    "Res2Net200_26w_4s",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels1,
-        num_channels2,
-        num_filters,
-        stride,
-        scales,
-        shortcut=True,
-        if_first=False,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-        self.stride = stride
-        self.scales = scales
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels1,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1_list = []
-        for s in range(scales - 1):
-            conv1 = self.add_sublayer(
-                name + "_branch2b_" + str(s + 1),
-                ConvBNLayer(
-                    num_channels=num_filters // scales,
-                    num_filters=num_filters // scales,
-                    filter_size=3,
-                    stride=stride,
-                    act="relu",
-                    name=name + "_branch2b_" + str(s + 1),
-                ),
-            )
-            self.conv1_list.append(conv1)
-        self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
-
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_channels2,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels1,
-                num_filters=num_channels2,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        xs = paddle.split(y, self.scales, 1)
-        ys = []
-        for s, conv1 in enumerate(self.conv1_list):
-            if s == 0 or self.stride == 2:
-                ys.append(conv1(xs[s]))
-            else:
-                ys.append(conv1(paddle.add(xs[s], ys[-1])))
-        if self.stride == 1:
-            ys.append(xs[-1])
-        else:
-            ys.append(self.pool2d_avg(xs[-1]))
-        conv1 = paddle.concat(ys, axis=1)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class Res2Net(nn.Layer):
-    def __init__(self, layers=50, scales=4, width=26, class_dim=1000):
-        super(Res2Net, self).__init__()
-
-        self.layers = layers
-        self.scales = scales
-        self.width = width
-        basic_width = self.width * self.scales
-        supported_layers = [50, 101, 152, 200]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_channels2 = [256, 512, 1024, 2048]
-        num_filters = [basic_width * t for t in [1, 2, 4, 8]]
-
-        self.conv1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="conv1",
-        )
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        for block in range(len(depth)):
-            shortcut = False
-            for i in range(depth[block]):
-                if layers in [101, 152] and block == 2:
-                    if i == 0:
-                        conv_name = "res" + str(block + 2) + "a"
-                    else:
-                        conv_name = "res" + str(block + 2) + "b" + str(i)
-                else:
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels1=num_channels[block]
-                        if i == 0
-                        else num_channels2[block],
-                        num_channels2=num_channels2[block],
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        scales=scales,
-                        shortcut=shortcut,
-                        if_first=block == i == 0,
-                        name=conv_name,
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv1(inputs)
-        y = self.pool2d_max(y)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def Res2Net50_48w_2s(**args):
-    model = Res2Net(layers=50, scales=2, width=48, **args)
-    return model
-
-
-def Res2Net50_26w_4s(**args):
-    model = Res2Net(layers=50, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net50_14w_8s(**args):
-    model = Res2Net(layers=50, scales=8, width=14, **args)
-    return model
-
-
-def Res2Net50_26w_6s(**args):
-    model = Res2Net(layers=50, scales=6, width=26, **args)
-    return model
-
-
-def Res2Net50_26w_8s(**args):
-    model = Res2Net(layers=50, scales=8, width=26, **args)
-    return model
-
-
-def Res2Net101_26w_4s(**args):
-    model = Res2Net(layers=101, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net152_26w_4s(**args):
-    model = Res2Net(layers=152, scales=4, width=26, **args)
-    return model
-
-
-def Res2Net200_26w_4s(**args):
-    model = Res2Net(layers=200, scales=4, width=26, **args)
-    return model
-
-
-def test_Res2Net50_48w_2s():
-    load_paddle_module_and_check(
-        Res2Net50_48w_2s, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_resnet.py b/examples/x2oneflow/paddle2oneflow/models/test_resnet.py
deleted file mode 100644
index 17feee63d0b388a19d4dedb39675aed84408c586..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_resnet.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-            data_format=data_format,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-            data_layout=data_format,
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        shortcut=True,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-            data_format=data_format,
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2b",
-            data_format=data_format,
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-            data_format=data_format,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-                data_format=data_format,
-            )
-
-        self.shortcut = shortcut
-
-        self._num_channels_out = num_filters * 4
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        shortcut=True,
-        name=None,
-        data_format="NCHW",
-    ):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2a",
-            data_format=data_format,
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b",
-            data_format=data_format,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-                data_format=data_format,
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv1)
-        y = F.relu(y)
-        return y
-
-
-class ResNet(nn.Layer):
-    def __init__(
-        self, layers=50, class_dim=1000, input_image_channel=3, data_format="NCHW"
-    ):
-        super(ResNet, self).__init__()
-
-        self.layers = layers
-        self.data_format = data_format
-        self.input_image_channel = input_image_channel
-
-        supported_layers = [18, 34, 50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-
-        self.conv = ConvBNLayer(
-            num_channels=self.input_image_channel,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="conv1",
-            data_format=self.data_format,
-        )
-        self.pool2d_max = MaxPool2D(
-            kernel_size=3, stride=2, padding=1, data_format=self.data_format
-        )
-
-        self.block_list = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = self.add_sublayer(
-                        conv_name,
-                        BottleneckBlock(
-                            num_channels=num_channels[block]
-                            if i == 0
-                            else num_filters[block] * 4,
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            name=conv_name,
-                            data_format=self.data_format,
-                        ),
-                    )
-                    self.block_list.append(bottleneck_block)
-                    shortcut = True
-        else:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = self.add_sublayer(
-                        conv_name,
-                        BasicBlock(
-                            num_channels=num_channels[block]
-                            if i == 0
-                            else num_filters[block],
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            name=conv_name,
-                            data_format=self.data_format,
-                        ),
-                    )
-                    self.block_list.append(basic_block)
-                    shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
-            bias_attr=ParamAttr(name="fc_0.b_0"),
-        )
-
-    def forward(self, inputs):
-        with paddle.static.amp.fp16_guard():
-            if self.data_format == "NHWC":
-                inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
-                inputs.stop_gradient = True
-            y = self.conv(inputs)
-            y = self.pool2d_max(y)
-            for block in self.block_list:
-                y = block(y)
-            y = self.pool2d_avg(y)
-            y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-            y = self.out(y)
-            return y
-
-
-def ResNet18(**args):
-    model = ResNet(layers=18, **args)
-    return model
-
-
-def ResNet34(**args):
-    model = ResNet(layers=34, **args)
-    return model
-
-
-def ResNet50(**args):
-    model = ResNet(layers=50, **args)
-    return model
-
-
-def ResNet101(**args):
-    model = ResNet(layers=101, **args)
-    return model
-
-
-def ResNet152(**args):
-    model = ResNet(layers=152, **args)
-    return model
-
-
-def test_ResNet18():
-    load_paddle_module_and_check(
-        ResNet18, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_resnext.py b/examples/x2oneflow/paddle2oneflow/models/test_resnext.py
deleted file mode 100644
index 40492587aec1cc15c1d57afb19190158ef86a9cf..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_resnext.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = [
-    "ResNeXt50_32x4d",
-    "ResNeXt50_64x4d",
-    "ResNeXt101_32x4d",
-    "ResNeXt101_64x4d",
-    "ResNeXt152_32x4d",
-    "ResNeXt152_64x4d",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self, num_channels, num_filters, stride, cardinality, shortcut=True, name=None
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_branch2a",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            groups=cardinality,
-            stride=stride,
-            act="relu",
-            name=name + "_branch2b",
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c",
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-                filter_size=1,
-                stride=stride,
-                name=name + "_branch1",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-
-
-class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
-        super(ResNeXt, self).__init__()
-
-        self.layers = layers
-        self.cardinality = cardinality
-        supported_layers = [50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        supported_cardinality = [32, 64]
-        assert (
-            cardinality in supported_cardinality
-        ), "supported cardinality is {} but input cardinality is {}".format(
-            supported_cardinality, cardinality
-        )
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_filters = (
-            [128, 256, 512, 1024] if cardinality == 32 else [256, 512, 1024, 2048]
-        )
-
-        self.conv = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act="relu",
-            name="res_conv1",
-        )
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        for block in range(len(depth)):
-            shortcut = False
-            for i in range(depth[block]):
-                if layers in [101, 152] and block == 2:
-                    if i == 0:
-                        conv_name = "res" + str(block + 2) + "a"
-                    else:
-                        conv_name = "res" + str(block + 2) + "b" + str(i)
-                else:
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels=num_channels[block]
-                        if i == 0
-                        else num_filters[block] * int(64 // self.cardinality),
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        cardinality=self.cardinality,
-                        shortcut=shortcut,
-                        name=conv_name,
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        y = self.pool2d_max(y)
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def ResNeXt50_32x4d(**args):
-    model = ResNeXt(layers=50, cardinality=32, **args)
-    return model
-
-
-def ResNeXt50_64x4d(**args):
-    model = ResNeXt(layers=50, cardinality=64, **args)
-    return model
-
-
-def ResNeXt101_32x4d(**args):
-    model = ResNeXt(layers=101, cardinality=32, **args)
-    return model
-
-
-def ResNeXt101_64x4d(**args):
-    model = ResNeXt(layers=101, cardinality=64, **args)
-    return model
-
-
-def ResNeXt152_32x4d(**args):
-    model = ResNeXt(layers=152, cardinality=32, **args)
-    return model
-
-
-def ResNeXt152_64x4d(**args):
-    model = ResNeXt(layers=152, cardinality=64, **args)
-    return model
-
-
-def test_ResNeXt50_32x4d():
-    load_paddle_module_and_check(
-        ResNeXt50_32x4d, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_se_resnext.py b/examples/x2oneflow/paddle2oneflow/models/test_se_resnext.py
deleted file mode 100644
index d04cd0f77ecccced82d51059f515b768947b2ed1..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_se_resnext.py
+++ /dev/null
@@ -1,325 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-
-import math
-
-__all__ = ["SE_ResNeXt50_32x4d", "SE_ResNeXt101_32x4d", "SE_ResNeXt152_64x4d"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = name + "_bn"
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class BottleneckBlock(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        stride,
-        cardinality,
-        reduction_ratio,
-        shortcut=True,
-        if_first=False,
-        name=None,
-    ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name="conv" + name + "_x1",
-        )
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            groups=cardinality,
-            stride=stride,
-            act="relu",
-            name="conv" + name + "_x2",
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            filter_size=1,
-            act=None,
-            name="conv" + name + "_x3",
-        )
-        self.scale = SELayer(
-            num_channels=num_filters * 2 if cardinality == 32 else num_filters,
-            num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-            reduction_ratio=reduction_ratio,
-            name="fc" + name,
-        )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 2 if cardinality == 32 else num_filters,
-                filter_size=1,
-                stride=stride,
-                name="conv" + name + "_prj",
-            )
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-        scale = self.scale(conv2)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=scale)
-        y = F.relu(y)
-        return y
-
-
-class SELayer(nn.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
-        super(SELayer, self).__init__()
-
-        self.pool2d_gap = AdaptiveAvgPool2D(1)
-
-        self._num_channels = num_channels
-
-        med_ch = int(num_channels / reduction_ratio)
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_sqz_offset"),
-        )
-        self.relu = nn.ReLU()
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_filters,
-            weight_attr=ParamAttr(
-                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"
-            ),
-            bias_attr=ParamAttr(name=name + "_exc_offset"),
-        )
-        self.sigmoid = nn.Sigmoid()
-
-    def forward(self, input):
-        pool = self.pool2d_gap(input)
-        pool = paddle.squeeze(pool, axis=[2, 3])
-        squeeze = self.squeeze(pool)
-        squeeze = self.relu(squeeze)
-        excitation = self.excitation(squeeze)
-        excitation = self.sigmoid(excitation)
-        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
-        out = input * excitation
-        return out
-
-
-class ResNeXt(nn.Layer):
-    def __init__(self, layers=50, class_dim=1000, cardinality=32):
-        super(ResNeXt, self).__init__()
-
-        self.layers = layers
-        self.cardinality = cardinality
-        self.reduction_ratio = 16
-        supported_layers = [50, 101, 152]
-        assert (
-            layers in supported_layers
-        ), "supported layers are {} but input layer is {}".format(
-            supported_layers, layers
-        )
-        supported_cardinality = [32, 64]
-        assert (
-            cardinality in supported_cardinality
-        ), "supported cardinality is {} but input cardinality is {}".format(
-            supported_cardinality, cardinality
-        )
-        if layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512, 1024]
-        num_filters = (
-            [128, 256, 512, 1024] if cardinality == 32 else [256, 512, 1024, 2048]
-        )
-        if layers < 152:
-            self.conv = ConvBNLayer(
-                num_channels=3,
-                num_filters=64,
-                filter_size=7,
-                stride=2,
-                act="relu",
-                name="conv1",
-            )
-        else:
-            self.conv1_1 = ConvBNLayer(
-                num_channels=3,
-                num_filters=64,
-                filter_size=3,
-                stride=2,
-                act="relu",
-                name="conv1",
-            )
-            self.conv1_2 = ConvBNLayer(
-                num_channels=64,
-                num_filters=64,
-                filter_size=3,
-                stride=1,
-                act="relu",
-                name="conv2",
-            )
-            self.conv1_3 = ConvBNLayer(
-                num_channels=64,
-                num_filters=128,
-                filter_size=3,
-                stride=1,
-                act="relu",
-                name="conv3",
-            )
-
-        self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        self.block_list = []
-        n = 1 if layers == 50 or layers == 101 else 3
-        for block in range(len(depth)):
-            n += 1
-            shortcut = False
-            for i in range(depth[block]):
-                bottleneck_block = self.add_sublayer(
-                    "bb_%d_%d" % (block, i),
-                    BottleneckBlock(
-                        num_channels=num_channels[block]
-                        if i == 0
-                        else num_filters[block] * int(64 // self.cardinality),
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        cardinality=self.cardinality,
-                        reduction_ratio=self.reduction_ratio,
-                        shortcut=shortcut,
-                        if_first=block == 0,
-                        name=str(n) + "_" + str(i + 1),
-                    ),
-                )
-                self.block_list.append(bottleneck_block)
-                shortcut = True
-
-        self.pool2d_avg = AdaptiveAvgPool2D(1)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-
-    def forward(self, inputs):
-        if self.layers < 152:
-            y = self.conv(inputs)
-        else:
-            y = self.conv1_1(inputs)
-            y = self.conv1_2(y)
-            y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-
-        for block in self.block_list:
-            y = block(y)
-        y = self.pool2d_avg(y)
-        y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y
-
-
-def SE_ResNeXt50_32x4d(**args):
-    model = ResNeXt(layers=50, cardinality=32, **args)
-    return model
-
-
-def SE_ResNeXt101_32x4d(**args):
-    model = ResNeXt(layers=101, cardinality=32, **args)
-    return model
-
-
-def SE_ResNeXt152_64x4d(**args):
-    model = ResNeXt(layers=152, cardinality=64, **args)
-    return model
-
-
-def test_SE_ResNeXt50_32x4d():
-    load_paddle_module_and_check(
-        SE_ResNeXt50_32x4d, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_shufflenet_v2.py b/examples/x2oneflow/paddle2oneflow/models/test_shufflenet_v2.py
deleted file mode 100644
index 062b2f5a3fadddc4b3388a663c142e3c49adccb6..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_shufflenet_v2.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddle import ParamAttr, reshape, transpose, concat, split
-from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear
-from paddle.nn.initializer import KaimingNormal
-from paddle.nn.functional import swish
-
-__all__ = [
-    "ShuffleNetV2_x0_25",
-    "ShuffleNetV2_x0_33",
-    "ShuffleNetV2_x0_5",
-    "ShuffleNetV2_x1_0",
-    "ShuffleNetV2_x1_5",
-    "ShuffleNetV2_x2_0",
-    "ShuffleNetV2_swish",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def channel_shuffle(x, groups):
-    batch_size, num_channels, height, width = x.shape[0:4]
-    channels_per_group = num_channels // groups
-
-    # reshape
-    x = reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width])
-
-    # transpose
-    x = transpose(x=x, perm=[0, 2, 1, 3, 4])
-
-    # flatten
-    x = reshape(x=x, shape=[batch_size, num_channels, height, width])
-    return x
-
-
-class ConvBNLayer(Layer):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal(), name=name + "_weights"),
-            bias_attr=False,
-        )
-
-        self._batch_norm = BatchNorm(
-            out_channels,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            act=act,
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InvertedResidual(Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu", name=None):
-        super(InvertedResidual, self).__init__()
-        self._conv_pw = ConvBNLayer(
-            in_channels=in_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv1",
-        )
-        self._conv_dw = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None,
-            name="stage_" + name + "_conv2",
-        )
-        self._conv_linear = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv3",
-        )
-
-    def forward(self, inputs):
-        x1, x2 = split(
-            inputs, num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], axis=1
-        )
-        x2 = self._conv_pw(x2)
-        x2 = self._conv_dw(x2)
-        x2 = self._conv_linear(x2)
-        out = concat([x1, x2], axis=1)
-        return channel_shuffle(out, 2)
-
-
-class InvertedResidualDS(Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu", name=None):
-        super(InvertedResidualDS, self).__init__()
-
-        # branch1
-        self._conv_dw_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=in_channels,
-            act=None,
-            name="stage_" + name + "_conv4",
-        )
-        self._conv_linear_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv5",
-        )
-        # branch2
-        self._conv_pw_2 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv1",
-        )
-        self._conv_dw_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None,
-            name="stage_" + name + "_conv2",
-        )
-        self._conv_linear_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act,
-            name="stage_" + name + "_conv3",
-        )
-
-    def forward(self, inputs):
-        x1 = self._conv_dw_1(inputs)
-        x1 = self._conv_linear_1(x1)
-        x2 = self._conv_pw_2(inputs)
-        x2 = self._conv_dw_2(x2)
-        x2 = self._conv_linear_2(x2)
-        out = concat([x1, x2], axis=1)
-
-        return channel_shuffle(out, 2)
-
-
-class ShuffleNet(Layer):
-    def __init__(self, class_dim=1000, scale=1.0, act="relu"):
-        super(ShuffleNet, self).__init__()
-        self.scale = scale
-        self.class_dim = class_dim
-        stage_repeats = [4, 8, 4]
-
-        if scale == 0.25:
-            stage_out_channels = [-1, 24, 24, 48, 96, 512]
-        elif scale == 0.33:
-            stage_out_channels = [-1, 24, 32, 64, 128, 512]
-        elif scale == 0.5:
-            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
-        elif scale == 1.0:
-            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
-        elif scale == 1.5:
-            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
-        elif scale == 2.0:
-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
-        else:
-            raise NotImplementedError(
-                "This scale size:[" + str(scale) + "] is not implemented!"
-            )
-        # 1. conv1
-        self._conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=stage_out_channels[1],
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            act=act,
-            name="stage1_conv",
-        )
-        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-        # 2. bottleneck sequences
-        self._block_list = []
-        for stage_id, num_repeat in enumerate(stage_repeats):
-            for i in range(num_repeat):
-                if i == 0:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + "_" + str(i + 1),
-                        sublayer=InvertedResidualDS(
-                            in_channels=stage_out_channels[stage_id + 1],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=2,
-                            act=act,
-                            name=str(stage_id + 2) + "_" + str(i + 1),
-                        ),
-                    )
-                else:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + "_" + str(i + 1),
-                        sublayer=InvertedResidual(
-                            in_channels=stage_out_channels[stage_id + 2],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=1,
-                            act=act,
-                            name=str(stage_id + 2) + "_" + str(i + 1),
-                        ),
-                    )
-                self._block_list.append(block)
-        # 3. last_conv
-        self._last_conv = ConvBNLayer(
-            in_channels=stage_out_channels[-2],
-            out_channels=stage_out_channels[-1],
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            act=act,
-            name="conv5",
-        )
-        # 4. pool
-        self._pool2d_avg = AdaptiveAvgPool2D(1)
-        self._out_c = stage_out_channels[-1]
-        # 5. fc
-        self._fc = Linear(
-            stage_out_channels[-1],
-            class_dim,
-            weight_attr=ParamAttr(name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-
-    def forward(self, inputs):
-        y = self._conv1(inputs)
-        y = self._max_pool(y)
-        for inv in self._block_list:
-            y = inv(y)
-        y = self._last_conv(y)
-        y = self._pool2d_avg(y)
-        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
-        y = self._fc(y)
-        return y
-
-
-def ShuffleNetV2_x0_25(**args):
-    model = ShuffleNet(scale=0.25, **args)
-    return model
-
-
-def ShuffleNetV2_x0_33(**args):
-    model = ShuffleNet(scale=0.33, **args)
-    return model
-
-
-def ShuffleNetV2_x0_5(**args):
-    model = ShuffleNet(scale=0.5, **args)
-    return model
-
-
-def ShuffleNetV2_x1_0(**args):
-    model = ShuffleNet(scale=1.0, **args)
-    return model
-
-
-def ShuffleNetV2_x1_5(**args):
-    model = ShuffleNet(scale=1.5, **args)
-    return model
-
-
-def ShuffleNetV2_x2_0(**args):
-    model = ShuffleNet(scale=2.0, **args)
-    return model
-
-
-def ShuffleNetV2_swish(**args):
-    model = ShuffleNet(scale=1.0, act="swish", **args)
-    return model
-
-
-def test_ShuffleNetV2_x0_25():
-    load_paddle_module_and_check(
-        ShuffleNetV2_x0_25, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_squeezenet.py b/examples/x2oneflow/paddle2oneflow/models/test_squeezenet.py
deleted file mode 100644
index c394611953d2c60605438f39b4b8380b70474c21..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_squeezenet.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["SqueezeNet1_0", "SqueezeNet1_1"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class MakeFireConv(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, filter_size, padding=0, name=None
-    ):
-        super(MakeFireConv, self).__init__()
-        self._conv = Conv2D(
-            input_channels,
-            output_channels,
-            filter_size,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=ParamAttr(name=name + "_offset"),
-        )
-
-    def forward(self, x):
-        x = self._conv(x)
-        x = F.relu(x)
-        return x
-
-
-class MakeFire(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        squeeze_channels,
-        expand1x1_channels,
-        expand3x3_channels,
-        name=None,
-    ):
-        super(MakeFire, self).__init__()
-        self._conv = MakeFireConv(
-            input_channels, squeeze_channels, 1, name=name + "_squeeze1x1"
-        )
-        self._conv_path1 = MakeFireConv(
-            squeeze_channels, expand1x1_channels, 1, name=name + "_expand1x1"
-        )
-        self._conv_path2 = MakeFireConv(
-            squeeze_channels, expand3x3_channels, 3, padding=1, name=name + "_expand3x3"
-        )
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x1 = self._conv_path1(x)
-        x2 = self._conv_path2(x)
-        return paddle.concat([x1, x2], axis=1)
-
-
-class SqueezeNet(nn.Layer):
-    def __init__(self, version, class_dim=1000):
-        super(SqueezeNet, self).__init__()
-        self.version = version
-
-        if self.version == "1.0":
-            self._conv = Conv2D(
-                3,
-                96,
-                7,
-                stride=2,
-                weight_attr=ParamAttr(name="conv1_weights"),
-                bias_attr=ParamAttr(name="conv1_offset"),
-            )
-            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-            self._conv1 = MakeFire(96, 16, 64, 64, name="fire2")
-            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
-            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
-
-            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
-            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
-            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
-            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
-
-            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
-        else:
-            self._conv = Conv2D(
-                3,
-                64,
-                3,
-                stride=2,
-                padding=1,
-                weight_attr=ParamAttr(name="conv1_weights"),
-                bias_attr=ParamAttr(name="conv1_offset"),
-            )
-            self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-            self._conv1 = MakeFire(64, 16, 64, 64, name="fire2")
-            self._conv2 = MakeFire(128, 16, 64, 64, name="fire3")
-
-            self._conv3 = MakeFire(128, 32, 128, 128, name="fire4")
-            self._conv4 = MakeFire(256, 32, 128, 128, name="fire5")
-
-            self._conv5 = MakeFire(256, 48, 192, 192, name="fire6")
-            self._conv6 = MakeFire(384, 48, 192, 192, name="fire7")
-            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
-            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._conv9 = Conv2D(
-            512,
-            class_dim,
-            1,
-            weight_attr=ParamAttr(name="conv10_weights"),
-            bias_attr=ParamAttr(name="conv10_offset"),
-        )
-        self._avg_pool = AdaptiveAvgPool2D(1)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = F.relu(x)
-        x = self._pool(x)
-        if self.version == "1.0":
-            x = self._conv1(x)
-            x = self._conv2(x)
-            x = self._conv3(x)
-            x = self._pool(x)
-            x = self._conv4(x)
-            x = self._conv5(x)
-            x = self._conv6(x)
-            x = self._conv7(x)
-            x = self._pool(x)
-            x = self._conv8(x)
-        else:
-            x = self._conv1(x)
-            x = self._conv2(x)
-            x = self._pool(x)
-            x = self._conv3(x)
-            x = self._conv4(x)
-            x = self._pool(x)
-            x = self._conv5(x)
-            x = self._conv6(x)
-            x = self._conv7(x)
-            x = self._conv8(x)
-        x = self._drop(x)
-        x = self._conv9(x)
-        x = F.relu(x)
-        x = self._avg_pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        return x
-
-
-def SqueezeNet1_0(**args):
-    model = SqueezeNet(version="1.0", **args)
-    return model
-
-
-def SqueezeNet1_1(**args):
-    model = SqueezeNet(version="1.1", **args)
-    return model
-
-
-def test_SqueezeNet1_0():
-    load_paddle_module_and_check(
-        SqueezeNet1_0, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_vggnet.py b/examples/x2oneflow/paddle2oneflow/models/test_vggnet.py
deleted file mode 100644
index 1eec4772e038fd475c962b9d792913cd7131ff30..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_vggnet.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["VGG11", "VGG13", "VGG16", "VGG19"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, groups, name=None):
-        super(ConvBlock, self).__init__()
-
-        self.groups = groups
-        self._conv_1 = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            weight_attr=ParamAttr(name=name + "1_weights"),
-            bias_attr=False,
-        )
-        if groups == 2 or groups == 3 or groups == 4:
-            self._conv_2 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "2_weights"),
-                bias_attr=False,
-            )
-        if groups == 3 or groups == 4:
-            self._conv_3 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "3_weights"),
-                bias_attr=False,
-            )
-        if groups == 4:
-            self._conv_4 = Conv2D(
-                in_channels=output_channels,
-                out_channels=output_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(name=name + "4_weights"),
-                bias_attr=False,
-            )
-
-        self._pool = MaxPool2D(kernel_size=2, stride=2, padding=0)
-
-    def forward(self, inputs):
-        x = self._conv_1(inputs)
-        x = F.relu(x)
-        if self.groups == 2 or self.groups == 3 or self.groups == 4:
-            x = self._conv_2(x)
-            x = F.relu(x)
-        if self.groups == 3 or self.groups == 4:
-            x = self._conv_3(x)
-            x = F.relu(x)
-        if self.groups == 4:
-            x = self._conv_4(x)
-            x = F.relu(x)
-        x = self._pool(x)
-        return x
-
-
-class VGGNet(nn.Layer):
-    def __init__(self, layers=11, stop_grad_layers=0, class_dim=1000):
-        super(VGGNet, self).__init__()
-
-        self.layers = layers
-        self.stop_grad_layers = stop_grad_layers
-        self.vgg_configure = {
-            11: [1, 1, 2, 2, 2],
-            13: [2, 2, 2, 2, 2],
-            16: [2, 2, 3, 3, 3],
-            19: [2, 2, 4, 4, 4],
-        }
-        assert (
-            self.layers in self.vgg_configure.keys()
-        ), "supported layers are {} but input layer is {}".format(
-            self.vgg_configure.keys(), layers
-        )
-        self.groups = self.vgg_configure[self.layers]
-
-        self._conv_block_1 = ConvBlock(3, 64, self.groups[0], name="conv1_")
-        self._conv_block_2 = ConvBlock(64, 128, self.groups[1], name="conv2_")
-        self._conv_block_3 = ConvBlock(128, 256, self.groups[2], name="conv3_")
-        self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_")
-        self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_")
-
-        for idx, block in enumerate(
-            [
-                self._conv_block_1,
-                self._conv_block_2,
-                self._conv_block_3,
-                self._conv_block_4,
-                self._conv_block_5,
-            ]
-        ):
-            if self.stop_grad_layers >= idx + 1:
-                for param in block.parameters():
-                    param.trainable = False
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._fc1 = Linear(
-            7 * 7 * 512,
-            4096,
-            weight_attr=ParamAttr(name="fc6_weights"),
-            bias_attr=ParamAttr(name="fc6_offset"),
-        )
-        self._fc2 = Linear(
-            4096,
-            4096,
-            weight_attr=ParamAttr(name="fc7_weights"),
-            bias_attr=ParamAttr(name="fc7_offset"),
-        )
-        self._out = Linear(
-            4096,
-            class_dim,
-            weight_attr=ParamAttr(name="fc8_weights"),
-            bias_attr=ParamAttr(name="fc8_offset"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv_block_1(inputs)
-        x = self._conv_block_2(x)
-        x = self._conv_block_3(x)
-        x = self._conv_block_4(x)
-        x = self._conv_block_5(x)
-        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-        x = self._fc1(x)
-        x = F.relu(x)
-        x = self._drop(x)
-        x = self._fc2(x)
-        x = F.relu(x)
-        x = self._drop(x)
-        x = self._out(x)
-        return x
-
-
-def VGG11(**args):
-    model = VGGNet(layers=11, **args)
-    return model
-
-
-def VGG13(**args):
-    model = VGGNet(layers=13, **args)
-    return model
-
-
-def VGG16(**args):
-    model = VGGNet(layers=16, **args)
-    return model
-
-
-def VGG19(**args):
-    model = VGGNet(layers=19, **args)
-    return model
-
-
-def test_VGG16():
-    load_paddle_module_and_check(
-        VGG11, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_vision_transformer.py b/examples/x2oneflow/paddle2oneflow/models/test_vision_transformer.py
deleted file mode 100644
index 8c366e69b853e070f19ab83d152a1c774b176da6..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_vision_transformer.py
+++ /dev/null
@@ -1,444 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import paddle
-import paddle.nn as nn
-from paddle.nn.initializer import TruncatedNormal, Constant
-
-__all__ = [
-    "VisionTransformer",
-    "ViT_small_patch16_224",
-    "ViT_base_patch16_224",
-    "ViT_base_patch16_384",
-    "ViT_base_patch32_384",
-    "ViT_large_patch16_224",
-    "ViT_large_patch16_384",
-    "ViT_large_patch32_384",
-    "ViT_huge_patch16_224",
-    "ViT_huge_patch32_384",
-]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-trunc_normal_ = TruncatedNormal(std=0.02)
-zeros_ = Constant(value=0.0)
-ones_ = Constant(value=1.0)
-
-
-def to_2tuple(x):
-    return tuple([x] * 2)
-
-
-def drop_path(x, drop_prob=0.0, training=False):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
-    """
-    if drop_prob == 0.0 or not training:
-        return x
-    keep_prob = paddle.to_tensor(1 - drop_prob)
-    shape = (paddle.shape(x)[0],) + (1,) * (x.ndim - 1)
-    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
-    random_tensor = paddle.floor(random_tensor)  # binarize
-    output = x.divide(keep_prob) * random_tensor
-    return output
-
-
-class DropPath(nn.Layer):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-
-class Identity(nn.Layer):
-    def __init__(self):
-        super(Identity, self).__init__()
-
-    def forward(self, input):
-        return input
-
-
-class Mlp(nn.Layer):
-    def __init__(
-        self,
-        in_features,
-        hidden_features=None,
-        out_features=None,
-        act_layer=nn.GELU,
-        drop=0.0,
-    ):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class Attention(nn.Layer):
-    def __init__(
-        self,
-        dim,
-        num_heads=8,
-        qkv_bias=False,
-        qk_scale=None,
-        attn_drop=0.0,
-        proj_drop=0.0,
-    ):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = qk_scale or head_dim ** -0.5
-
-        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-
-    def forward(self, x):
-        # B= paddle.shape(x)[0]
-        N, C = x.shape[1:]
-        qkv = (
-            self.qkv(x)
-            .reshape((-1, N, 3, self.num_heads, C // self.num_heads))
-            .transpose((2, 0, 3, 1, 4))
-        )
-        q, k, v = qkv[0], qkv[1], qkv[2]
-
-        attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale
-        attn = nn.functional.softmax(attn, axis=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C))
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Layer):
-    def __init__(
-        self,
-        dim,
-        num_heads,
-        mlp_ratio=4.0,
-        qkv_bias=False,
-        qk_scale=None,
-        drop=0.0,
-        attn_drop=0.0,
-        drop_path=0.0,
-        act_layer=nn.GELU,
-        norm_layer="nn.LayerNorm",
-        epsilon=1e-5,
-    ):
-        super().__init__()
-        self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
-        self.attn = Attention(
-            dim,
-            num_heads=num_heads,
-            qkv_bias=qkv_bias,
-            qk_scale=qk_scale,
-            attn_drop=attn_drop,
-            proj_drop=drop,
-        )
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
-        self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(
-            in_features=dim,
-            hidden_features=mlp_hidden_dim,
-            act_layer=act_layer,
-            drop=drop,
-        )
-
-    def forward(self, x):
-        x = x + self.drop_path(self.attn(self.norm1(x)))
-        x = x + self.drop_path(self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Layer):
-    """ Image to Patch Embedding
-    """
-
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.num_patches = num_patches
-
-        self.proj = nn.Conv2D(
-            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
-        )
-
-    def forward(self, x):
-        B, C, H, W = x.shape
-        assert (
-            H == self.img_size[0] and W == self.img_size[1]
-        ), "Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
-
-        x = self.proj(x).flatten(2).transpose((0, 2, 1))
-        return x
-
-
-class VisionTransformer(nn.Layer):
-    """ Vision Transformer with support for patch input
-    """
-
-    def __init__(
-        self,
-        img_size=224,
-        patch_size=16,
-        in_chans=3,
-        class_dim=1000,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=False,
-        qk_scale=None,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.0,
-        norm_layer="nn.LayerNorm",
-        epsilon=1e-5,
-        **args
-    ):
-        super().__init__()
-        self.class_dim = class_dim
-
-        self.num_features = self.embed_dim = embed_dim
-
-        self.patch_embed = PatchEmbed(
-            img_size=img_size,
-            patch_size=patch_size,
-            in_chans=in_chans,
-            embed_dim=embed_dim,
-        )
-        num_patches = self.patch_embed.num_patches
-
-        self.pos_embed = self.create_parameter(
-            shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_
-        )
-        self.add_parameter("pos_embed", self.pos_embed)
-        self.cls_token = self.create_parameter(
-            shape=(1, 1, embed_dim), default_initializer=zeros_
-        )
-        self.add_parameter("cls_token", self.cls_token)
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        dpr = np.linspace(0, drop_path_rate, depth)
-
-        self.blocks = nn.LayerList(
-            [
-                Block(
-                    dim=embed_dim,
-                    num_heads=num_heads,
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    attn_drop=attn_drop_rate,
-                    drop_path=dpr[i],
-                    norm_layer=norm_layer,
-                    epsilon=epsilon,
-                )
-                for i in range(depth)
-            ]
-        )
-
-        self.norm = eval(norm_layer)(embed_dim, epsilon=epsilon)
-
-        # Classifier head
-        self.head = nn.Linear(embed_dim, class_dim) if class_dim > 0 else Identity()
-
-        # TODO(littletomatodonkey): same init in static mode
-        if paddle.in_dynamic_mode():
-            trunc_normal_(self.pos_embed)
-            trunc_normal_(self.cls_token)
-            self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                zeros_(m.bias)
-        elif isinstance(m, nn.LayerNorm):
-            zeros_(m.bias)
-            ones_(m.weight)
-
-    def forward_features(self, x):
-        # B = x.shape[0]
-        B = paddle.shape(x)[0]
-        x = self.patch_embed(x)
-        cls_tokens = self.cls_token.expand((B, -1, -1))
-        x = paddle.concat((cls_tokens, x), axis=1)
-        x = x + self.pos_embed
-        x = self.pos_drop(x)
-        for blk in self.blocks:
-            x = blk(x)
-        x = self.norm(x)
-        return x[:, 0]
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        x = self.head(x)
-        return x
-
-
-def ViT_small_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=768,
-        depth=8,
-        num_heads=8,
-        mlp_ratio=3,
-        qk_scale=768 ** -0.5,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch16_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_base_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch16_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_large_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        epsilon=1e-6,
-        **kwargs
-    )
-    return model
-
-
-def ViT_huge_patch16_224(**kwargs):
-    model = VisionTransformer(
-        patch_size=16, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, **kwargs
-    )
-    return model
-
-
-def ViT_huge_patch32_384(**kwargs):
-    model = VisionTransformer(
-        img_size=384,
-        patch_size=32,
-        embed_dim=1280,
-        depth=32,
-        num_heads=16,
-        mlp_ratio=4,
-        **kwargs
-    )
-    return model
-
-
-def test_ViT_small_patch16_224():
-    load_paddle_module_and_check(
-        ViT_small_patch16_224, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_xception.py b/examples/x2oneflow/paddle2oneflow/models/test_xception.py
deleted file mode 100644
index 78176748db92e8b4adf22bf6b9777c98f16f288b..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_xception.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
-import math
-import sys
-
-__all__ = ["Xception41", "Xception65", "Xception71"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        stride=1,
-        groups=1,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-        )
-        bn_name = "bn_" + name
-        self._batch_norm = BatchNorm(
-            num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + "_variance",
-        )
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class SeparableConv(nn.Layer):
-    def __init__(self, input_channels, output_channels, stride=1, name=None):
-        super(SeparableConv, self).__init__()
-
-        self._pointwise_conv = ConvBNLayer(
-            input_channels, output_channels, 1, name=name + "_sep"
-        )
-        self._depthwise_conv = ConvBNLayer(
-            output_channels,
-            output_channels,
-            3,
-            stride=stride,
-            groups=output_channels,
-            name=name + "_dw",
-        )
-
-    def forward(self, inputs):
-        x = self._pointwise_conv(inputs)
-        x = self._depthwise_conv(x)
-        return x
-
-
-class EntryFlowBottleneckBlock(nn.Layer):
-    def __init__(
-        self, input_channels, output_channels, stride=2, name=None, relu_first=False
-    ):
-        super(EntryFlowBottleneckBlock, self).__init__()
-        self.relu_first = relu_first
-
-        self._short = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=1,
-            stride=stride,
-            padding=0,
-            weight_attr=ParamAttr(name + "_branch1_weights"),
-            bias_attr=False,
-        )
-        self._conv1 = SeparableConv(
-            input_channels, output_channels, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv2 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2b_weights"
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=stride, padding=1)
-
-    def forward(self, inputs):
-        conv0 = inputs
-        short = self._short(inputs)
-        if self.relu_first:
-            conv0 = F.relu(conv0)
-        conv1 = self._conv1(conv0)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv2(conv2)
-        pool = self._pool(conv2)
-        return paddle.add(x=short, y=pool)
-
-
-class EntryFlow(nn.Layer):
-    def __init__(self, block_num=3):
-        super(EntryFlow, self).__init__()
-
-        name = "entry_flow"
-        self.block_num = block_num
-        self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1")
-        self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2")
-        if block_num == 3:
-            self._conv_0 = EntryFlowBottleneckBlock(
-                64, 128, stride=2, name=name + "_0", relu_first=False
-            )
-            self._conv_1 = EntryFlowBottleneckBlock(
-                128, 256, stride=2, name=name + "_1", relu_first=True
-            )
-            self._conv_2 = EntryFlowBottleneckBlock(
-                256, 728, stride=2, name=name + "_2", relu_first=True
-            )
-        elif block_num == 5:
-            self._conv_0 = EntryFlowBottleneckBlock(
-                64, 128, stride=2, name=name + "_0", relu_first=False
-            )
-            self._conv_1 = EntryFlowBottleneckBlock(
-                128, 256, stride=1, name=name + "_1", relu_first=True
-            )
-            self._conv_2 = EntryFlowBottleneckBlock(
-                256, 256, stride=2, name=name + "_2", relu_first=True
-            )
-            self._conv_3 = EntryFlowBottleneckBlock(
-                256, 728, stride=1, name=name + "_3", relu_first=True
-            )
-            self._conv_4 = EntryFlowBottleneckBlock(
-                728, 728, stride=2, name=name + "_4", relu_first=True
-            )
-        else:
-            sys.exit(-1)
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-
-        if self.block_num == 3:
-            x = self._conv_0(x)
-            x = self._conv_1(x)
-            x = self._conv_2(x)
-        elif self.block_num == 5:
-            x = self._conv_0(x)
-            x = self._conv_1(x)
-            x = self._conv_2(x)
-            x = self._conv_3(x)
-            x = self._conv_4(x)
-        return x
-
-
-class MiddleFlowBottleneckBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels, name):
-        super(MiddleFlowBottleneckBlock, self).__init__()
-
-        self._conv_0 = SeparableConv(
-            input_channels, output_channels, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv_1 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2b_weights"
-        )
-        self._conv_2 = SeparableConv(
-            output_channels, output_channels, stride=1, name=name + "_branch2c_weights"
-        )
-
-    def forward(self, inputs):
-        conv0 = F.relu(inputs)
-        conv0 = self._conv_0(conv0)
-        conv1 = F.relu(conv0)
-        conv1 = self._conv_1(conv1)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv_2(conv2)
-        return paddle.add(x=inputs, y=conv2)
-
-
-class MiddleFlow(nn.Layer):
-    def __init__(self, block_num=8):
-        super(MiddleFlow, self).__init__()
-
-        self.block_num = block_num
-        self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0")
-        self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1")
-        self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2")
-        self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3")
-        self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4")
-        self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5")
-        self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6")
-        self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7")
-        if block_num == 16:
-            self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8")
-            self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9")
-            self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10")
-            self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11")
-            self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12")
-            self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13")
-            self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14")
-            self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15")
-
-    def forward(self, inputs):
-        x = self._conv_0(inputs)
-        x = self._conv_1(x)
-        x = self._conv_2(x)
-        x = self._conv_3(x)
-        x = self._conv_4(x)
-        x = self._conv_5(x)
-        x = self._conv_6(x)
-        x = self._conv_7(x)
-        if self.block_num == 16:
-            x = self._conv_8(x)
-            x = self._conv_9(x)
-            x = self._conv_10(x)
-            x = self._conv_11(x)
-            x = self._conv_12(x)
-            x = self._conv_13(x)
-            x = self._conv_14(x)
-            x = self._conv_15(x)
-        return x
-
-
-class ExitFlowBottleneckBlock(nn.Layer):
-    def __init__(self, input_channels, output_channels1, output_channels2, name):
-        super(ExitFlowBottleneckBlock, self).__init__()
-
-        self._short = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels2,
-            kernel_size=1,
-            stride=2,
-            padding=0,
-            weight_attr=ParamAttr(name + "_branch1_weights"),
-            bias_attr=False,
-        )
-        self._conv_1 = SeparableConv(
-            input_channels, output_channels1, stride=1, name=name + "_branch2a_weights"
-        )
-        self._conv_2 = SeparableConv(
-            output_channels1,
-            output_channels2,
-            stride=1,
-            name=name + "_branch2b_weights",
-        )
-        self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-
-    def forward(self, inputs):
-        short = self._short(inputs)
-        conv0 = F.relu(inputs)
-        conv1 = self._conv_1(conv0)
-        conv2 = F.relu(conv1)
-        conv2 = self._conv_2(conv2)
-        pool = self._pool(conv2)
-        return paddle.add(x=short, y=pool)
-
-
-class ExitFlow(nn.Layer):
-    def __init__(self, class_dim):
-        super(ExitFlow, self).__init__()
-
-        name = "exit_flow"
-
-        self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1")
-        self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2")
-        self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3")
-        self._pool = AdaptiveAvgPool2D(1)
-        stdv = 1.0 / math.sqrt(2048 * 1.0)
-        self._out = Linear(
-            2048,
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc_offset"),
-        )
-
-    def forward(self, inputs):
-        conv0 = self._conv_0(inputs)
-        conv1 = self._conv_1(conv0)
-        conv1 = F.relu(conv1)
-        conv2 = self._conv_2(conv1)
-        conv2 = F.relu(conv2)
-        pool = self._pool(conv2)
-        pool = paddle.flatten(pool, start_axis=1, stop_axis=-1)
-        out = self._out(pool)
-        return out
-
-
-class Xception(nn.Layer):
-    def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8, class_dim=1000):
-        super(Xception, self).__init__()
-        self.entry_flow_block_num = entry_flow_block_num
-        self.middle_flow_block_num = middle_flow_block_num
-        self._entry_flow = EntryFlow(entry_flow_block_num)
-        self._middle_flow = MiddleFlow(middle_flow_block_num)
-        self._exit_flow = ExitFlow(class_dim)
-
-    def forward(self, inputs):
-        x = self._entry_flow(inputs)
-        x = self._middle_flow(x)
-        x = self._exit_flow(x)
-        return x
-
-
-def Xception41(**args):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **args)
-    return model
-
-
-def Xception65(**args):
-    model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **args)
-    return model
-
-
-def Xception71(**args):
-    model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **args)
-    return model
-
-
-def test_Xception41():
-    load_paddle_module_and_check(
-        Xception41, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/models/test_xception_deeplab.py b/examples/x2oneflow/paddle2oneflow/models/test_xception_deeplab.py
deleted file mode 100644
index 5ab4cfc7cb47e4e182755ddb147b81a96b02c23d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/models/test_xception_deeplab.py
+++ /dev/null
@@ -1,448 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-
-__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def check_data(data, number):
-    if type(data) == int:
-        return [data] * number
-    assert len(data) == number
-    return data
-
-
-def check_stride(s, os):
-    if s <= os:
-        return True
-    else:
-        return False
-
-
-def check_points(count, points):
-    if points is None:
-        return False
-    else:
-        if isinstance(points, list):
-            return True if count in points else False
-        else:
-            return True if count == points else False
-
-
-def gen_bottleneck_params(backbone="xception_65"):
-    if backbone == "xception_65":
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    elif backbone == "xception_41":
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (8, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    elif backbone == "xception_71":
-        bottleneck_params = {
-            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]),
-        }
-    else:
-        raise Exception(
-            "xception backbont only support xception_41/xception_65/xception_71"
-        )
-    return bottleneck_params
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        filter_size,
-        stride=1,
-        padding=0,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            in_channels=input_channels,
-            out_channels=output_channels,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            weight_attr=ParamAttr(name=name + "/weights"),
-            bias_attr=False,
-        )
-        self._bn = BatchNorm(
-            num_channels=output_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"),
-            moving_mean_name=name + "/BatchNorm/moving_mean",
-            moving_variance_name=name + "/BatchNorm/moving_variance",
-        )
-
-    def forward(self, inputs):
-        return self._bn(self._conv(inputs))
-
-
-class Seperate_Conv(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        stride,
-        filter,
-        dilation=1,
-        act=None,
-        name=None,
-    ):
-        super(Seperate_Conv, self).__init__()
-
-        self._conv1 = Conv2D(
-            in_channels=input_channels,
-            out_channels=input_channels,
-            kernel_size=filter,
-            stride=stride,
-            groups=input_channels,
-            padding=(filter) // 2 * dilation,
-            dilation=dilation,
-            weight_attr=ParamAttr(name=name + "/depthwise/weights"),
-            bias_attr=False,
-        )
-        self._bn1 = BatchNorm(
-            input_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"),
-            moving_mean_name=name + "/depthwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/depthwise/BatchNorm/moving_variance",
-        )
-        self._conv2 = Conv2D(
-            input_channels,
-            output_channels,
-            1,
-            stride=1,
-            groups=1,
-            padding=0,
-            weight_attr=ParamAttr(name=name + "/pointwise/weights"),
-            bias_attr=False,
-        )
-        self._bn2 = BatchNorm(
-            output_channels,
-            act=act,
-            epsilon=1e-3,
-            momentum=0.99,
-            param_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"),
-            moving_mean_name=name + "/pointwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/pointwise/BatchNorm/moving_variance",
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._bn1(x)
-        x = self._conv2(x)
-        x = self._bn2(x)
-        return x
-
-
-class Xception_Block(nn.Layer):
-    def __init__(
-        self,
-        input_channels,
-        output_channels,
-        strides=1,
-        filter_size=3,
-        dilation=1,
-        skip_conv=True,
-        has_skip=True,
-        activation_fn_in_separable_conv=False,
-        name=None,
-    ):
-        super(Xception_Block, self).__init__()
-
-        repeat_number = 3
-        output_channels = check_data(output_channels, repeat_number)
-        filter_size = check_data(filter_size, repeat_number)
-        strides = check_data(strides, repeat_number)
-
-        self.has_skip = has_skip
-        self.skip_conv = skip_conv
-        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
-        if not activation_fn_in_separable_conv:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                dilation=dilation,
-                name=name + "/separable_conv1",
-            )
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                dilation=dilation,
-                name=name + "/separable_conv2",
-            )
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                dilation=dilation,
-                name=name + "/separable_conv3",
-            )
-        else:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv1",
-            )
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv2",
-            )
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv3",
-            )
-
-        if has_skip and skip_conv:
-            self._short = ConvBNLayer(
-                input_channels,
-                output_channels[-1],
-                1,
-                stride=strides[-1],
-                padding=0,
-                name=name + "/shortcut",
-            )
-
-    def forward(self, inputs):
-        if not self.activation_fn_in_separable_conv:
-            x = F.relu(inputs)
-            x = self._conv1(x)
-            x = F.relu(x)
-            x = self._conv2(x)
-            x = F.relu(x)
-            x = self._conv3(x)
-        else:
-            x = self._conv1(inputs)
-            x = self._conv2(x)
-            x = self._conv3(x)
-        if self.has_skip:
-            if self.skip_conv:
-                skip = self._short(inputs)
-            else:
-                skip = inputs
-            return paddle.add(x, skip)
-        else:
-            return x
-
-
-class XceptionDeeplab(nn.Layer):
-    def __init__(self, backbone, class_dim=1000):
-        super(XceptionDeeplab, self).__init__()
-
-        bottleneck_params = gen_bottleneck_params(backbone)
-        self.backbone = backbone
-
-        self._conv1 = ConvBNLayer(
-            3,
-            32,
-            3,
-            stride=2,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv1",
-        )
-        self._conv2 = ConvBNLayer(
-            32,
-            64,
-            3,
-            stride=1,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv2",
-        )
-
-        self.block_num = bottleneck_params["entry_flow"][0]
-        self.strides = bottleneck_params["entry_flow"][1]
-        self.chns = bottleneck_params["entry_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-
-        self.entry_flow = []
-        self.middle_flow = []
-
-        self.stride = 2
-        self.output_stride = 32
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = (
-                self.strides[i]
-                if check_stride(s * self.strides[i], self.output_stride)
-                else 1
-            )
-            xception_block = self.add_sublayer(
-                self.backbone + "/entry_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=64 if i == 0 else self.chns[i - 1],
-                    output_channels=self.chns[i],
-                    strides=[1, 1, self.stride],
-                    name=self.backbone + "/entry_flow/block" + str(i + 1),
-                ),
-            )
-            self.entry_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["middle_flow"][0]
-        self.strides = bottleneck_params["middle_flow"][1]
-        self.chns = bottleneck_params["middle_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = (
-                self.strides[i]
-                if check_stride(s * self.strides[i], self.output_stride)
-                else 1
-            )
-            xception_block = self.add_sublayer(
-                self.backbone + "/middle_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=728,
-                    output_channels=728,
-                    strides=[1, 1, self.strides[i]],
-                    skip_conv=False,
-                    name=self.backbone + "/middle_flow/block" + str(i + 1),
-                ),
-            )
-            self.middle_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["exit_flow"][0]
-        self.strides = bottleneck_params["exit_flow"][1]
-        self.chns = bottleneck_params["exit_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-        stride = (
-            self.strides[0]
-            if check_stride(s * self.strides[0], self.output_stride)
-            else 1
-        )
-        self._exit_flow_1 = Xception_Block(
-            728, self.chns[0], [1, 1, stride], name=self.backbone + "/exit_flow/block1"
-        )
-        s = s * stride
-        stride = (
-            self.strides[1]
-            if check_stride(s * self.strides[1], self.output_stride)
-            else 1
-        )
-        self._exit_flow_2 = Xception_Block(
-            self.chns[0][-1],
-            self.chns[1],
-            [1, 1, stride],
-            dilation=2,
-            has_skip=False,
-            activation_fn_in_separable_conv=True,
-            name=self.backbone + "/exit_flow/block2",
-        )
-        s = s * stride
-
-        self.stride = s
-
-        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
-        self._pool = AdaptiveAvgPool2D(1)
-        self._fc = Linear(
-            self.chns[1][-1],
-            class_dim,
-            weight_attr=ParamAttr(name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_bias"),
-        )
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        for ef in self.entry_flow:
-            x = ef(x)
-        for mf in self.middle_flow:
-            x = mf(x)
-        x = self._exit_flow_1(x)
-        x = self._exit_flow_2(x)
-        x = self._drop(x)
-        x = self._pool(x)
-        x = paddle.squeeze(x, axis=[2, 3])
-        x = self._fc(x)
-        return x
-
-
-def Xception41_deeplab(**args):
-    model = XceptionDeeplab("xception_41", **args)
-    return model
-
-
-def Xception65_deeplab(**args):
-    model = XceptionDeeplab("xception_65", **args)
-    return model
-
-
-def Xception71_deeplab(**args):
-    model = XceptionDeeplab("xception_71", **args)
-    return model
-
-
-def test_Xception41_deeplab():
-    load_paddle_module_and_check(
-        Xception41_deeplab, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_activations.py b/examples/x2oneflow/paddle2oneflow/nodes/test_activations.py
deleted file mode 100644
index b3e6b6bf5e541a2fd6fb0aa23888c362efcd1b22..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_activations.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_relu():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = F.relu(x)
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_array.py b/examples/x2oneflow/paddle2oneflow/nodes/test_array.py
deleted file mode 100644
index 91514fd3f111eec3f0986919017705e49547b0c0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_array.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_concat():
-    class Net(nn.Layer):
-        def forward(self, x):
-            y = x * 3
-            return paddle.concat((x, y))
-
-    load_paddle_module_and_check(Net)
-
-
-def test_concat_with_axis():
-    class Net(nn.Layer):
-        def forward(self, x):
-            y = x * 3
-            return paddle.concat((x, y), axis=1)
-
-    load_paddle_module_and_check(Net)
-
-
-def test_unsqueeze():
-    class Net(nn.Layer):
-        def forward(self, x):
-            return paddle.unsqueeze(x, 2)
-
-    load_paddle_module_and_check(Net)
-
-
-def test_transpose():
-    class Net(nn.Layer):
-        def forward(self, x):
-            # shape = x.shape
-            return paddle.transpose(x, perm=(0, 3, 1, 2))
-
-    load_paddle_module_and_check(Net)
-
-
-def test_gather():
-    class Net(nn.Layer):
-        def forward(self, x):
-            return x[1]
-
-    load_paddle_module_and_check(Net)
-
-
-def test_tensor_index():
-    class Net(nn.Layer):
-        def forward(self, x):
-            return x[0, 1:3, :1, 2:4]
-
-    load_paddle_module_and_check(Net)
-
-def test_split():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x1, y1 =  paddle.split(x,  num_or_sections=[3, 3], axis=1)
-            return x1
-    load_paddle_module_and_check(Net, input_size=(1, 6, 3, 3))
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_batch_norm.py b/examples/x2oneflow/paddle2oneflow/nodes/test_batch_norm.py
deleted file mode 100644
index 6f1f5c63a129f65ca3ff8f8a9bd6e8b8e2c7e4f4..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_batch_norm.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_bn():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.bn = nn.BatchNorm2D(4)
-
-        def forward(self, x):
-            x = self.bn(x)
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_clip.py b/examples/x2oneflow/paddle2oneflow/nodes/test_clip.py
deleted file mode 100644
index 80209cc543eb9c1f6da025fc4de9069d904d22d0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_clip.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_clip_min_max():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.clip(x, min=-0.5, max=3.1)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_clip_min():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.clip(x, min=-2.2)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_clip_max():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.clip(x, max=1.2)
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_conv.py b/examples/x2oneflow/paddle2oneflow/nodes/test_conv.py
deleted file mode 100644
index af4f99ed6f0140026fb8a15841c14d7854d93c51..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_conv.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_conv2d_k3s1p1():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2D(4, 5, 3, padding=1)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s1p0():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2D(4, 5, 3, padding=0)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s2p0():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2D(4, 5, 3, stride=2, padding=0)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-
-def test_conv2d_k3s2p0g2():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2D(4, 6, 3, stride=1, padding=1, groups=2)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-
-def test_conv2d_k3s2p0g2d2():
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2D(4, 6, 3, stride=1, padding=1, groups=2, dilation=2)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 13, 12))
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_flatten.py b/examples/x2oneflow/paddle2oneflow/nodes/test_flatten.py
deleted file mode 100644
index 2a7438d9061196acc5357f3e8622b62b0f659047..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_flatten.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_flatten():
-    class Net(nn.Layer):
-        def forward(self, x):
-            flatten = nn.Flatten()
-            x = flatten(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-def test_flatten_v2():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.flatten(x, start_axis=1, stop_axis=-1)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-def test_flatten_axis():
-    class Net(nn.Layer):
-        def forward(self, x):
-            flatten = nn.Flatten()
-            x = paddle.flatten(x, start_axis=1, stop_axis=-1, name="flatten")
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_math.py b/examples/x2oneflow/paddle2oneflow/nodes/test_math.py
deleted file mode 100644
index cc510ab21a52265d95e9b07e1b63292b78c479eb..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_math.py
+++ /dev/null
@@ -1,298 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import numpy as np
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_add():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x += x
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_sub():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x -= 2
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_mul():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x *= x
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_div():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x /= 3
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_sqrt():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.sqrt(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_min_val=0)
-
-
-def test_pow():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.pow(x, 3)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_tanh():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.tanh(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_sigmoid():
-    class Net(nn.Layer):
-        def forward(self, x):
-            m = nn.Sigmoid()
-            x = m(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-# def test_erf():
-#     class Net(nn.Layer):
-#         def forward(self, x):
-#             x = paddle.erf(x)
-#             return x
-
-#     load_paddle_module_and_check(Net)
-
-
-def test_clip():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.clip(x, min=-1, max=2)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-# def test_cast():
-#     class Net(nn.Layer):
-#         def forward(self, x):
-#             x = paddle.cast(x, 'float64')
-#             return x
-
-#     load_paddle_module_and_check(Net)
-
-def test_abs():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.abs(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_add_v2():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.add(x, x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-def test_argmax():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.argmax(x, -1)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-def test_bmm():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.bmm(x, x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(3, 2, 2))
-
-def test_exp():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.exp(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-def test_floor():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.floor(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_hard_sigmoid():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.nn.functional. hardsigmoid(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_hard_swish():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.nn.functional.hardswish(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_leaky_relu():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.nn.functional.leaky_relu(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_log():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.log(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_matmul():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.matmul(x, x)
-            return x
-    
-    load_paddle_module_and_check(Net, input_size=(3, 3))
-
-def test_mean():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.mean(x, axis=-1)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_prod():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.prod(x)
-            return x
-    
-    load_paddle_module_and_check(Net, input_size=(3, ))
-
-def test_scale():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.scale(x, scale=2.0, bias=1.0)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_squeeze():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.squeeze(x, axis=1)
-            return x
-    
-    load_paddle_module_and_check(Net, input_size=(5, 1, 10))
-
-def test_sqrt():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.sqrt(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_square():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.square(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_stack():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.stack([x, x], axis=0)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_stride_slice():
-    class Net(nn.Layer):
-        def forward(self, x):
-            axes = [1, 2, 3]
-            starts = [-3, 0, 2]
-            ends = [3, 2, 4]
-            strides_1 = [1, 1, 1]
-            strides_2 = [1, 1, 2]
-            x = paddle.strided_slice(x, axes=axes, starts=starts, ends=ends, strides=strides_1)
-            return x
-    
-    load_paddle_module_and_check(Net, input_size=(3, 4, 5, 6))
-
-def test_swish():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.nn.functional.swish(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-def test_tanh():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.tanh(x)
-            return x
-    
-    load_paddle_module_and_check(Net)
-
-
-
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_pad.py b/examples/x2oneflow/paddle2oneflow/nodes/test_pad.py
deleted file mode 100644
index 99d61a8e602f4ced5088c6b2592446eb2295d26d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_pad.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-# from absl import app
-# from absl.testing import absltest
-
-
-def test_pad():
-    class Net(nn.Layer):
-        def forward(self, x):
-            my_pad = nn.Pad2D(padding=[2, 2, 3, 3], mode="constant")
-            x = my_pad(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_pad_with_value():
-    class Net(nn.Layer):
-        def forward(self, x):
-            my_pad = nn.Pad2D(padding=[2, 2, 3, 3], mode="constant", value=3.5)
-            x = my_pad(x)
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_pooling.py b/examples/x2oneflow/paddle2oneflow/nodes/test_pooling.py
deleted file mode 100644
index 60551d7683fab7a350b774b9a7079f4fd3961640..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_pooling.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def _test_k3s1p1(pt_pool):
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(kernel_size=3, stride=1, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_maxpool_k3s1p1():
-    _test_k3s1p1(nn.MaxPool2D)
-
-
-def test_avgpool_k3s1p1():
-    _test_k3s1p1(nn.AvgPool2D)
-
-
-def _test_k4s2p2(pt_pool):
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(kernel_size=4, stride=2, padding=2)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k4s2p2():
-    _test_k4s2p2(nn.MaxPool2D)
-
-
-def test_avgpool_k4s2p3():
-    _test_k4s2p2(nn.AvgPool2D)
-
-
-def _test_k43s2p1(pt_pool):
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(kernel_size=(4, 3), stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p1():
-    _test_k43s2p1(nn.MaxPool2D)
-
-
-def test_avgpool_k43s2p1():
-    _test_k43s2p1(nn.AvgPool2D)
-
-
-def _test_k43s2p21(pt_pool):
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool((4, 3), stride=2, padding=(2, 1))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p21():
-    _test_k43s2p21(nn.MaxPool2D)
-
-
-def test_avgpool_k43s2p21():
-    _test_k43s2p21(nn.AvgPool2D)
-
-
-def _test_global_pooling(pt_pool):
-    class Net(nn.Layer):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool((1, 1))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_paddle_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_global_avg_pooling():
-    _test_global_pooling(nn.AdaptiveAvgPool2D)
-
-
-# def test_global_max_pooling():
-#     _test_global_pooling(nn.AdaptiveMaxPool2D)
-
-# from absl import app
-# from absl.testing import absltest
-
-#  = absltest.TestCase
-# test_global_max_pooling()
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_reduction.py b/examples/x2oneflow/paddle2oneflow/nodes/test_reduction.py
deleted file mode 100644
index b47a826e32eb807ed694e5c133619b5ec2d8dae9..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_reduction.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-# def test_reduce_mean():
-#     class Net(nn.Layer):
-#         def forward(self, x):
-#             return paddle.mean(x)
-
-#     load_paddle_module_and_check(Net)
-
-
-def test_reduce_mean_axis():
-    class Net(nn.Layer):
-        def forward(self, x):
-            return paddle.mean(x, axis=1)
-
-    load_paddle_module_and_check(Net)
-
-
-def test_reduce_mean_axis_keepdim():
-    class Net(nn.Layer):
-        def forward(self, x):
-            return paddle.mean(x, axis=3, keepdim=True)
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_reshape.py b/examples/x2oneflow/paddle2oneflow/nodes/test_reshape.py
deleted file mode 100644
index fbd36bb072fa8b2bf0f32f43746f30aed0feaf46..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_reshape.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-# TODO(daquexian): add tests for 0 and -1 after flow.reshape supports it
-def test_reshape():
-    class Net(nn.Layer):
-        def forward(self, x):
-            x = paddle.reshape(x, (5, 12))
-            return x
-
-    load_paddle_module_and_check(Net, (2, 5, 3, 2))
diff --git a/examples/x2oneflow/paddle2oneflow/nodes/test_softmax.py b/examples/x2oneflow/paddle2oneflow/nodes/test_softmax.py
deleted file mode 100644
index 484e2eb5831b88c32d871a02cd8dd158c68dfc93..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/paddle2oneflow/nodes/test_softmax.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_paddle_module_and_check
-
-
-def test_softmax():
-    class Net(nn.Layer):
-        def forward(self, x):
-            m = nn.Softmax()
-            x = m(x)
-            return x
-
-    load_paddle_module_and_check(Net)
-
-
-def test_softmax_with_axis():
-    class Net(nn.Layer):
-        def forward(self, x):
-            m = nn.Softmax(axis=1)
-            x = m(x)
-            return x
-
-    load_paddle_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_alexnet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_alexnet.py
deleted file mode 100644
index a02e57e618d5dabe99890a55cee9bfbc921db120..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_alexnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_alexnet():
-    load_pytorch_module_and_check(
-        torchvision.models.alexnet,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow",
-        oneflow_code_gen_flag=True,
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_densenet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_densenet.py
deleted file mode 100644
index 7f2dad285bf10e62f35db900ab1c36b78a7cad2e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_densenet.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-"""DenseNet in PyTorch."""
-import math
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, in_planes, growth_rate):
-        super(Bottleneck, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
-        self.conv2 = nn.Conv2d(
-            4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False
-        )
-
-    def forward(self, x):
-        out = self.conv1(F.relu(self.bn1(x)))
-        out = self.conv2(F.relu(self.bn2(out)))
-        out = torch.cat([out, x], 1)
-        return out
-
-
-class Transition(nn.Module):
-    def __init__(self, in_planes, out_planes):
-        super(Transition, self).__init__()
-        self.bn = nn.BatchNorm2d(in_planes)
-        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
-
-    def forward(self, x):
-        out = self.conv(F.relu(self.bn(x)))
-        out = F.avg_pool2d(out, 2)
-        return out
-
-
-class DenseNet(nn.Module):
-    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
-        super(DenseNet, self).__init__()
-        self.growth_rate = growth_rate
-
-        num_planes = 2 * growth_rate
-        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
-
-        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
-        num_planes += nblocks[0] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans1 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
-        num_planes += nblocks[1] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans2 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
-        num_planes += nblocks[2] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans3 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
-        num_planes += nblocks[3] * growth_rate
-
-        self.bn = nn.BatchNorm2d(num_planes)
-        self.linear = nn.Linear(num_planes, num_classes)
-
-    def _make_dense_layers(self, block, in_planes, nblock):
-        layers = []
-        for i in range(nblock):
-            layers.append(block(in_planes, self.growth_rate))
-            in_planes += self.growth_rate
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.trans1(self.dense1(out))
-        out = self.trans2(self.dense2(out))
-        out = self.trans3(self.dense3(out))
-        out = self.dense4(out)
-        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def DenseNet121():
-    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=32)
-
-
-def DenseNet169():
-    return DenseNet(Bottleneck, [6, 12, 32, 32], growth_rate=32)
-
-
-def DenseNet201():
-    return DenseNet(Bottleneck, [6, 12, 48, 32], growth_rate=32)
-
-
-def DenseNet161():
-    return DenseNet(Bottleneck, [6, 12, 36, 24], growth_rate=48)
-
-
-def densenet_cifar():
-    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=12)
-
-
-def test_densenet():
-    load_pytorch_module_and_check(
-        densenet_cifar, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dlanet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_dlanet.py
deleted file mode 100644
index d1e08a4082ec62909c66e1bf0819204dee0360f9..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dlanet.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''DLA in PyTorch.
-Reference:
-    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(BasicBlock, self).__init__()
-        self.conv1 = nn.Conv2d(
-            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
-                               stride=1, padding=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes,
-                          kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion*planes)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.bn2(self.conv2(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class Root(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=1):
-        super(Root, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels, out_channels, kernel_size,
-            stride=1, padding=(kernel_size - 1) // 2, bias=False)
-        self.bn = nn.BatchNorm2d(out_channels)
-
-    def forward(self, xs):
-        x = torch.cat(xs, 1)
-        out = F.relu(self.bn(self.conv(x)))
-        return out
-
-
-class Tree(nn.Module):
-    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
-        super(Tree, self).__init__()
-        self.level = level
-        if level == 1:
-            self.root = Root(2*out_channels, out_channels)
-            self.left_node = block(in_channels, out_channels, stride=stride)
-            self.right_node = block(out_channels, out_channels, stride=1)
-        else:
-            self.root = Root((level+2)*out_channels, out_channels)
-            for i in reversed(range(1, level)):
-                subtree = Tree(block, in_channels, out_channels,
-                               level=i, stride=stride)
-                self.__setattr__('level_%d' % i, subtree)
-            self.prev_root = block(in_channels, out_channels, stride=stride)
-            self.left_node = block(out_channels, out_channels, stride=1)
-            self.right_node = block(out_channels, out_channels, stride=1)
-
-    def forward(self, x):
-        xs = [self.prev_root(x)] if self.level > 1 else []
-        for i in reversed(range(1, self.level)):
-            level_i = self.__getattr__('level_%d' % i)
-            x = level_i(x)
-            xs.append(x)
-        x = self.left_node(x)
-        xs.append(x)
-        x = self.right_node(x)
-        xs.append(x)
-        out = self.root(xs)
-        return out
-
-
-class DLA(nn.Module):
-    def __init__(self, block=BasicBlock, num_classes=10):
-        super(DLA, self).__init__()
-        self.base = nn.Sequential(
-            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(16),
-            nn.ReLU(True)
-        )
-
-        self.layer1 = nn.Sequential(
-            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(16),
-            nn.ReLU(True)
-        )
-
-        self.layer2 = nn.Sequential(
-            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(32),
-            nn.ReLU(True)
-        )
-
-        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
-        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
-        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
-        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
-        self.linear = nn.Linear(512, num_classes)
-
-    def forward(self, x):
-        out = self.base(x)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = self.layer5(out)
-        out = self.layer6(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-def test_dlanet():
-    load_pytorch_module_and_check(
-        DLA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dpn.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_dpn.py
deleted file mode 100644
index cbf09ce5c74f526ca62803e2691e0ebf2b6b0617..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dpn.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''Dual Path Networks in PyTorch.'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
-        super(Bottleneck, self).__init__()
-        self.out_planes = out_planes
-        self.dense_depth = dense_depth
-
-        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
-        self.bn2 = nn.BatchNorm2d(in_planes)
-        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
-
-        self.shortcut = nn.Sequential()
-        if first_layer:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(out_planes+dense_depth)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        x = self.shortcut(x)
-        d = self.out_planes
-        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
-        out = F.relu(out)
-        return out
-
-
-class DPN(nn.Module):
-    def __init__(self, cfg):
-        super(DPN, self).__init__()
-        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
-        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.last_planes = 64
-        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
-        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
-        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
-        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
-        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
-
-    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
-        strides = [stride] + [1]*(num_blocks-1)
-        layers = []
-        for i,stride in enumerate(strides):
-            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
-            self.last_planes = out_planes + (i+2) * dense_depth
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def DPN26():
-    cfg = {
-        'in_planes': (96,192,384,768),
-        'out_planes': (256,512,1024,2048),
-        'num_blocks': (2,2,2,2),
-        'dense_depth': (16,32,24,128)
-    }
-    return DPN(cfg)
-
-def DPN92():
-    cfg = {
-        'in_planes': (96,192,384,768),
-        'out_planes': (256,512,1024,2048),
-        'num_blocks': (3,4,20,3),
-        'dense_depth': (16,32,24,128)
-    }
-    return DPN(cfg)
-
-def test_dpn():
-    load_pytorch_module_and_check(
-        DPN26, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True, 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_efficientnet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_efficientnet.py
deleted file mode 100644
index afaea926915782e2077960a5f31c5b9bae16d74d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_efficientnet.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/efficientnet.py
-
-
-def swish(x):
-    return x * x.sigmoid()
-
-
-def drop_connect(x, drop_ratio):
-    keep_ratio = 1.0 - drop_ratio
-    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
-    mask.bernoulli_(keep_ratio)
-    x.div_(keep_ratio)
-    x.mul_(mask)
-    return x
-
-
-class SE(nn.Module):
-    """Squeeze-and-Excitation block with Swish."""
-
-    def __init__(self, in_channels, se_channels):
-        super(SE, self).__init__()
-        self.se1 = nn.Conv2d(in_channels, se_channels, kernel_size=1, bias=True)
-        self.se2 = nn.Conv2d(se_channels, in_channels, kernel_size=1, bias=True)
-
-    def forward(self, x):
-        out = F.adaptive_avg_pool2d(x, (1, 1))
-        out = swish(self.se1(out))
-        out = self.se2(out).sigmoid()
-        out = x * out
-        return out
-
-
-class Block(nn.Module):
-    """expansion + depthwise + pointwise + squeeze-excitation"""
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        expand_ratio=1,
-        se_ratio=0.0,
-        drop_rate=0.0,
-    ):
-        super(Block, self).__init__()
-        self.stride = stride
-        self.drop_rate = drop_rate
-        self.expand_ratio = expand_ratio
-
-        # Expansion
-        channels = expand_ratio * in_channels
-        self.conv1 = nn.Conv2d(
-            in_channels, channels, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(channels)
-
-        # Depthwise conv
-        self.conv2 = nn.Conv2d(
-            channels,
-            channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(1 if kernel_size == 3 else 2),
-            groups=channels,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(channels)
-
-        # SE layers
-        se_channels = int(in_channels * se_ratio)
-        self.se = SE(channels, se_channels)
-
-        # Output
-        self.conv3 = nn.Conv2d(
-            channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_channels)
-
-        # Skip connection if in and out shapes are the same (MV-V2 style)
-        self.has_skip = (stride == 1) and (in_channels == out_channels)
-
-    def forward(self, x):
-        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
-        out = swish(self.bn2(self.conv2(out)))
-        out = self.se(out)
-        out = self.bn3(self.conv3(out))
-        if self.has_skip:
-            if self.training and self.drop_rate > 0:
-                out = drop_connect(out, self.drop_rate)
-            out = out + x
-        return out
-
-
-class EfficientNet(nn.Module):
-    def __init__(self, cfg, num_classes=10):
-        super(EfficientNet, self).__init__()
-        self.cfg = cfg
-        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(32)
-        self.layers = self._make_layers(in_channels=32)
-        self.linear = nn.Linear(cfg["out_channels"][-1], num_classes)
-
-    def _make_layers(self, in_channels):
-        layers = []
-        cfg = [
-            self.cfg[k]
-            for k in [
-                "expansion",
-                "out_channels",
-                "num_blocks",
-                "kernel_size",
-                "stride",
-            ]
-        ]
-        b = 0
-        blocks = sum(self.cfg["num_blocks"])
-        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
-            strides = [stride] + [1] * (num_blocks - 1)
-            for stride in strides:
-                drop_rate = self.cfg["drop_connect_rate"] * b / blocks
-                layers.append(
-                    Block(
-                        in_channels,
-                        out_channels,
-                        kernel_size,
-                        stride,
-                        expansion,
-                        se_ratio=0.25,
-                        drop_rate=drop_rate,
-                    )
-                )
-                in_channels = out_channels
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = swish(self.bn1(self.conv1(x)))
-        out = self.layers(out)
-        out = F.adaptive_avg_pool2d(out, 1)
-        out = out.view(out.size(0), -1)
-        dropout_rate = self.cfg["dropout_rate"]
-        if self.training and dropout_rate > 0:
-            out = F.dropout(out, p=dropout_rate)
-        out = self.linear(out)
-        return out
-
-
-def EfficientNetB0():
-    cfg = {
-        "num_blocks": [1, 2, 2, 3, 3, 4, 1],
-        "expansion": [1, 6, 6, 6, 6, 6, 6],
-        "out_channels": [16, 24, 40, 80, 112, 192, 320],
-        "kernel_size": [3, 3, 5, 3, 5, 5, 3],
-        "stride": [1, 2, 2, 2, 1, 2, 1],
-        "dropout_rate": 0.2,
-        "drop_connect_rate": 0.2,
-    }
-    return EfficientNet(cfg)
-
-
-def test_efficientNetB0():
-    load_pytorch_module_and_check(
-        EfficientNetB0, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_googlenet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_googlenet.py
deleted file mode 100644
index 6464f47d63209cc81626094259fe827adc6dee59..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_googlenet.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/googlenet.py
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Inception(nn.Module):
-    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
-        super(Inception, self).__init__()
-        # 1x1 conv branch
-        self.b1 = nn.Sequential(
-            nn.Conv2d(in_planes, n1x1, kernel_size=1),
-            nn.BatchNorm2d(n1x1),
-            nn.ReLU(True),
-        )
-
-        # 1x1 conv -> 3x3 conv branch
-        self.b2 = nn.Sequential(
-            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
-            nn.BatchNorm2d(n3x3red),
-            nn.ReLU(True),
-            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n3x3),
-            nn.ReLU(True),
-        )
-
-        # 1x1 conv -> 5x5 conv branch
-        self.b3 = nn.Sequential(
-            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
-            nn.BatchNorm2d(n5x5red),
-            nn.ReLU(True),
-            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n5x5),
-            nn.ReLU(True),
-            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n5x5),
-            nn.ReLU(True),
-        )
-
-        # 3x3 pool -> 1x1 conv branch
-        self.b4 = nn.Sequential(
-            nn.MaxPool2d(3, stride=1, padding=1),
-            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
-            nn.BatchNorm2d(pool_planes),
-            nn.ReLU(True),
-        )
-
-    def forward(self, x):
-        y1 = self.b1(x)
-        y2 = self.b2(x)
-        y3 = self.b3(x)
-        y4 = self.b4(x)
-        return torch.cat([y1, y2, y3, y4], 1)
-
-
-class GoogLeNet(nn.Module):
-    def __init__(self):
-        super(GoogLeNet, self).__init__()
-        self.pre_layers = nn.Sequential(
-            nn.Conv2d(3, 192, kernel_size=3, padding=1),
-            nn.BatchNorm2d(192),
-            nn.ReLU(True),
-        )
-
-        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
-        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
-
-        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-
-        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
-        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
-        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
-        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
-        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
-
-        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
-        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
-
-        self.avgpool = nn.AvgPool2d(8, stride=1)
-        self.linear = nn.Linear(1024, 10)
-
-    def forward(self, x):
-        out = self.pre_layers(x)
-        out = self.a3(out)
-        out = self.b3(out)
-        out = self.maxpool(out)
-        out = self.a4(out)
-        out = self.b4(out)
-        out = self.c4(out)
-        out = self.d4(out)
-        out = self.e4(out)
-        out = self.maxpool(out)
-        out = self.a5(out)
-        out = self.b5(out)
-        out = self.avgpool(out)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def test_googlenet():
-    load_pytorch_module_and_check(
-        GoogLeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_inception.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_inception.py
deleted file mode 100644
index ceb6af4f8ebfaef10fafe22c44a13d883e28ff36..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_inception.py
+++ /dev/null
@@ -1,349 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/inceptionv3.py
-
-
-class BasicConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, **kwargs):
-        super().__init__()
-        self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs)
-        self.bn = nn.BatchNorm2d(output_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-
-        return x
-
-
-# same naive inception module
-class InceptionA(nn.Module):
-    def __init__(self, input_channels, pool_features):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1)
-
-        self.branch5x5 = nn.Sequential(
-            BasicConv2d(input_channels, 48, kernel_size=1),
-            BasicConv2d(48, 64, kernel_size=5, padding=2),
-        )
-
-        self.branch3x3 = nn.Sequential(
-            BasicConv2d(input_channels, 64, kernel_size=1),
-            BasicConv2d(64, 96, kernel_size=3, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, padding=1),
-        )
-
-        self.branchpool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1(same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1x1 -> 5x5(same)
-        branch5x5 = self.branch5x5(x)
-        # branch5x5 = self.branch5x5_2(branch5x5)
-
-        # x -> 1x1 -> 3x3 -> 3x3(same)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> pool -> 1x1(same)
-        branchpool = self.branchpool(x)
-
-        outputs = [branch1x1, branch5x5, branch3x3, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# downsample
-# Factorization into smaller convolutions
-class InceptionB(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-
-        self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2)
-
-        self.branch3x3stack = nn.Sequential(
-            BasicConv2d(input_channels, 64, kernel_size=1),
-            BasicConv2d(64, 96, kernel_size=3, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, stride=2),
-        )
-
-        self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2)
-
-    def forward(self, x):
-
-        # x - > 3x3(downsample)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> 3x3 -> 3x3(downsample)
-        branch3x3stack = self.branch3x3stack(x)
-
-        # x -> avgpool(downsample)
-        branchpool = self.branchpool(x)
-
-        # """We can use two parallel stride 2 blocks: P and C. P is a pooling
-        # layer (either average or maximum pooling) the activation, both of
-        # them are stride 2 the filter banks of which are concatenated as in
-        # figure 10."""
-        outputs = [branch3x3, branch3x3stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# Factorizing Convolutions with Large Filter Size
-class InceptionC(nn.Module):
-    def __init__(self, input_channels, channels_7x7):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1)
-
-        c7 = channels_7x7
-
-        # In theory, we could go even further and argue that one can replace any n × n
-        # convolution by a 1 × n convolution followed by a n × 1 convolution and the
-        # computational cost saving increases dramatically as n grows (see figure 6).
-        self.branch7x7 = nn.Sequential(
-            BasicConv2d(input_channels, c7, kernel_size=1),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
-        )
-
-        self.branch7x7stack = nn.Sequential(
-            BasicConv2d(input_channels, c7, kernel_size=1),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
-        )
-
-        self.branch_pool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, 192, kernel_size=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1(same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1layer 1*7 and 7*1 (same)
-        branch7x7 = self.branch7x7(x)
-
-        # x-> 2layer 1*7 and 7*1(same)
-        branch7x7stack = self.branch7x7stack(x)
-
-        # x-> avgpool (same)
-        branchpool = self.branch_pool(x)
-
-        outputs = [branch1x1, branch7x7, branch7x7stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-class InceptionD(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-
-        self.branch3x3 = nn.Sequential(
-            BasicConv2d(input_channels, 192, kernel_size=1),
-            BasicConv2d(192, 320, kernel_size=3, stride=2),
-        )
-
-        self.branch7x7 = nn.Sequential(
-            BasicConv2d(input_channels, 192, kernel_size=1),
-            BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)),
-            BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(192, 192, kernel_size=3, stride=2),
-        )
-
-        self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2)
-
-    def forward(self, x):
-
-        # x -> 1x1 -> 3x3(downsample)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample)
-        branch7x7 = self.branch7x7(x)
-
-        # x -> avgpool (downsample)
-        branchpool = self.branchpool(x)
-
-        outputs = [branch3x3, branch7x7, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# same
-class InceptionE(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1)
-
-        self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1)
-        self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
-        self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
-
-        self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1)
-        self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1)
-        self.branch3x3stack_3a = BasicConv2d(
-            384, 384, kernel_size=(1, 3), padding=(0, 1)
-        )
-        self.branch3x3stack_3b = BasicConv2d(
-            384, 384, kernel_size=(3, 1), padding=(1, 0)
-        )
-
-        self.branch_pool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, 192, kernel_size=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1 (same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1x1 -> 3x1
-        # x -> 1x1 -> 1x3
-        # concatenate(3x1, 1x3)
-        # """7. Inception modules with expanded the filter bank outputs.
-        # This architecture is used on the coarsest (8 × 8) grids to promote
-        # high dimensional representations, as suggested by principle
-        # 2 of Section 2."""
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)]
-        branch3x3 = torch.cat(branch3x3, 1)
-
-        # x -> 1x1 -> 3x3 -> 1x3
-        # x -> 1x1 -> 3x3 -> 3x1
-        # concatenate(1x3, 3x1)
-        branch3x3stack = self.branch3x3stack_1(x)
-        branch3x3stack = self.branch3x3stack_2(branch3x3stack)
-        branch3x3stack = [
-            self.branch3x3stack_3a(branch3x3stack),
-            self.branch3x3stack_3b(branch3x3stack),
-        ]
-        branch3x3stack = torch.cat(branch3x3stack, 1)
-
-        branchpool = self.branch_pool(x)
-
-        outputs = [branch1x1, branch3x3, branch3x3stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-class InceptionV3(nn.Module):
-    def __init__(self, num_classes=100):
-        super().__init__()
-        self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, padding=1)
-        self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1)
-        self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
-        self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
-        self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
-
-        # naive inception module
-        self.Mixed_5b = InceptionA(192, pool_features=32)
-        self.Mixed_5c = InceptionA(256, pool_features=64)
-        self.Mixed_5d = InceptionA(288, pool_features=64)
-
-        # downsample
-        self.Mixed_6a = InceptionB(288)
-
-        self.Mixed_6b = InceptionC(768, channels_7x7=128)
-        self.Mixed_6c = InceptionC(768, channels_7x7=160)
-        self.Mixed_6d = InceptionC(768, channels_7x7=160)
-        self.Mixed_6e = InceptionC(768, channels_7x7=192)
-
-        # downsample
-        self.Mixed_7a = InceptionD(768)
-
-        self.Mixed_7b = InceptionE(1280)
-        self.Mixed_7c = InceptionE(2048)
-
-        # 6*6 feature size
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.dropout = nn.Dropout2d()
-        self.linear = nn.Linear(2048, num_classes)
-
-    def forward(self, x):
-
-        # 32 -> 30
-        x = self.Conv2d_1a_3x3(x)
-        x = self.Conv2d_2a_3x3(x)
-        x = self.Conv2d_2b_3x3(x)
-        x = self.Conv2d_3b_1x1(x)
-        x = self.Conv2d_4a_3x3(x)
-
-        # 30 -> 30
-        x = self.Mixed_5b(x)
-        x = self.Mixed_5c(x)
-        x = self.Mixed_5d(x)
-
-        # 30 -> 14
-        # Efficient Grid Size Reduction to avoid representation
-        # bottleneck
-        x = self.Mixed_6a(x)
-
-        # 14 -> 14
-        # """In practice, we have found that employing this factorization does not
-        # work well on early layers, but it gives very good results on medium
-        # grid-sizes (On m × m feature maps, where m ranges between 12 and 20).
-        # On that level, very good results can be achieved by using 1 × 7 convolutions
-        # followed by 7 × 1 convolutions."""
-        x = self.Mixed_6b(x)
-        x = self.Mixed_6c(x)
-        x = self.Mixed_6d(x)
-        x = self.Mixed_6e(x)
-
-        # 14 -> 6
-        # Efficient Grid Size Reduction
-        x = self.Mixed_7a(x)
-
-        # 6 -> 6
-        # We are using this solution only on the coarsest grid,
-        # since that is the place where producing high dimensional
-        # sparse representation is the most critical as the ratio of
-        # local processing (by 1 × 1 convolutions) is increased compared
-        # to the spatial aggregation."""
-        x = self.Mixed_7b(x)
-        x = self.Mixed_7c(x)
-
-        # 6 -> 1
-        x = self.avgpool(x)
-        x = self.dropout(x)
-        x = x.view(x.size(0), -1)
-        x = self.linear(x)
-        return x
-
-
-def test_inception_v3():
-    load_pytorch_module_and_check(
-        InceptionV3, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_lenet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_lenet.py
deleted file mode 100644
index 32a4f51e0c11a66174a1a9e3ca6072e587983293..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_lenet.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/lenet.py
-
-'''LeNet in PyTorch.'''
-import torch.nn as nn
-import torch.nn.functional as F
-
-class LeNet(nn.Module):
-    def __init__(self):
-        super(LeNet, self).__init__()
-        self.conv1 = nn.Conv2d(3, 6, 5)
-        self.conv2 = nn.Conv2d(6, 16, 5)
-        self.fc1   = nn.Linear(16*5*5, 120)
-        self.fc2   = nn.Linear(120, 84)
-        self.fc3   = nn.Linear(84, 10)
-
-    def forward(self, x):
-        out = F.relu(self.conv1(x))
-        out = F.max_pool2d(out, 2)
-        out = F.relu(self.conv2(out))
-        out = F.max_pool2d(out, 2)
-        out = out.view(out.size(0), -1)
-        out = F.relu(self.fc1(out))
-        out = F.relu(self.fc2(out))
-        out = self.fc3(out)
-        return out
-
-def test_lenet():
-    load_pytorch_module_and_check(
-        LeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
-
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v1.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v1.py
deleted file mode 100644
index f10e1dfee59efda73d4e1919b5db9a4721de3725..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v1.py
+++ /dev/null
@@ -1,164 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/mobilenet.py
-
-import torch
-import torch.nn as nn
-
-
-class DepthSeperabelConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
-        super().__init__()
-        self.depthwise = nn.Sequential(
-            nn.Conv2d(
-                input_channels,
-                input_channels,
-                kernel_size,
-                groups=input_channels,
-                **kwargs
-            ),
-            nn.BatchNorm2d(input_channels),
-            nn.ReLU(inplace=True),
-        )
-
-        self.pointwise = nn.Sequential(
-            nn.Conv2d(input_channels, output_channels, 1),
-            nn.BatchNorm2d(output_channels),
-            nn.ReLU(inplace=True),
-        )
-
-    def forward(self, x):
-        x = self.depthwise(x)
-        x = self.pointwise(x)
-
-        return x
-
-
-class BasicConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
-
-        super().__init__()
-        self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
-        self.bn = nn.BatchNorm2d(output_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-
-        return x
-
-
-class MobileNetV1(nn.Module):
-
-    """
-    Args:
-        width multipler: The role of the width multiplier α is to thin
-                         a network uniformly at each layer. For a given
-                         layer and width multiplier α, the number of
-                         input channels M becomes αM and the number of
-                         output channels N becomes αN.
-    """
-
-    def __init__(self, width_multiplier=1, class_num=100):
-        super().__init__()
-
-        alpha = width_multiplier
-        self.stem = nn.Sequential(
-            BasicConv2d(3, int(32 * alpha), 3, padding=1, bias=False),
-            DepthSeperabelConv2d(
-                int(32 * alpha), int(64 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv1 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(64 * alpha), int(128 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(128 * alpha), int(128 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv2 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(128 * alpha), int(256 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(256 * alpha), int(256 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv3 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(256 * alpha), int(512 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv4 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(1024 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(1024 * alpha), int(1024 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        self.fc = nn.Linear(int(1024 * alpha), class_num)
-        self.avg = nn.AdaptiveAvgPool2d(1)
-
-    def forward(self, x):
-        x = self.stem(x)
-
-        x = self.conv1(x)
-        x = self.conv2(x)
-        x = self.conv3(x)
-        x = self.conv4(x)
-
-        x = self.avg(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        return x
-
-
-def test_mobilenet_v1():
-    load_pytorch_module_and_check(
-        MobileNetV1, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True, 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v2.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v2.py
deleted file mode 100644
index 818cad8a4bbd200c0e8923cf43dccc786e8869d9..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v2.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_mobilenet_v2():
-    load_pytorch_module_and_check(
-        torchvision.models.mobilenet_v2,
-        input_size=(1, 3, 224, 224),
-        input_min_val=0,
-        input_max_val=1,
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow",
-        oneflow_code_gen_flag=True, 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v3.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v3.py
deleted file mode 100644
index 251dbbc8abefff3b21703647f4e3b20441e303d5..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v3.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn import init
-
-# https://github.com/xiaolai-sqlai/mobilenetv3/blob/master/mobilenetv3.py
-
-
-class hswish(nn.Module):
-    def forward(self, x):
-        out = x * F.relu6(x + 3, inplace=True) / 6
-        return out
-
-
-class hsigmoid(nn.Module):
-    def forward(self, x):
-        out = F.relu6(x + 3, inplace=True) / 6
-        return out
-
-
-class SeModule(nn.Module):
-    def __init__(self, in_size, reduction=4):
-        super(SeModule, self).__init__()
-        self.se = nn.Sequential(
-            nn.AdaptiveAvgPool2d(1),
-            nn.Conv2d(
-                in_size,
-                in_size // reduction,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            ),
-            nn.BatchNorm2d(in_size // reduction),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(
-                in_size // reduction,
-                in_size,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            ),
-            nn.BatchNorm2d(in_size),
-            hsigmoid(),
-        )
-
-    def forward(self, x):
-        return x * self.se(x)
-
-
-class Block(nn.Module):
-    """expand + depthwise + pointwise"""
-
-    def __init__(
-        self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride
-    ):
-        super(Block, self).__init__()
-        self.stride = stride
-        self.se = semodule
-
-        self.conv1 = nn.Conv2d(
-            in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(expand_size)
-        self.nolinear1 = nolinear
-        self.conv2 = nn.Conv2d(
-            expand_size,
-            expand_size,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=kernel_size // 2,
-            groups=expand_size,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(expand_size)
-        self.nolinear2 = nolinear
-        self.conv3 = nn.Conv2d(
-            expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_size)
-
-        self.shortcut = nn.Sequential()
-        if stride == 1 and in_size != out_size:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(
-                    in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
-                ),
-                nn.BatchNorm2d(out_size),
-            )
-
-    def forward(self, x):
-        out = self.nolinear1(self.bn1(self.conv1(x)))
-        out = self.nolinear2(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        if self.se != None:
-            out = self.se(out)
-        out = out + self.shortcut(x) if self.stride == 1 else out
-        return out
-
-
-class MobileNetV3_Large(nn.Module):
-    def __init__(self, num_classes=1000):
-        super(MobileNetV3_Large, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.hs1 = hswish()
-
-        self.bneck = nn.Sequential(
-            Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
-            Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
-            Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
-            Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
-            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
-            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
-            Block(3, 40, 240, 80, hswish(), None, 2),
-            Block(3, 80, 200, 80, hswish(), None, 1),
-            Block(3, 80, 184, 80, hswish(), None, 1),
-            Block(3, 80, 184, 80, hswish(), None, 1),
-            Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
-            Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
-            Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
-            Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
-            Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
-        )
-
-        self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(960)
-        self.hs2 = hswish()
-        self.linear3 = nn.Linear(960, 1280)
-        self.bn3 = nn.BatchNorm1d(1280)
-        self.hs3 = hswish()
-        self.linear4 = nn.Linear(1280, num_classes)
-        self.init_params()
-
-    def init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d):
-                init.constant_(m.weight, 1)
-                init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                init.normal_(m.weight, std=0.001)
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        out = self.hs1(self.bn1(self.conv1(x)))
-        out = self.bneck(out)
-        out = self.hs2(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 7)
-        out = out.view(out.size(0), -1)
-        out = self.hs3(self.bn3(self.linear3(out)))
-        out = self.linear4(out)
-        return out
-
-
-class MobileNetV3_Small(nn.Module):
-    def __init__(self, num_classes=1000):
-        super(MobileNetV3_Small, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.hs1 = hswish()
-
-        self.bneck = nn.Sequential(
-            Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
-            Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
-            Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
-            Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
-            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
-            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
-            Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
-            Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
-            Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
-            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
-            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
-        )
-
-        self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(576)
-        self.hs2 = hswish()
-        self.linear3 = nn.Linear(576, 1280)
-        self.bn3 = nn.BatchNorm1d(1280)
-        self.hs3 = hswish()
-        self.linear4 = nn.Linear(1280, num_classes)
-        self.init_params()
-
-    def init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d):
-                init.constant_(m.weight, 1)
-                init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                init.normal_(m.weight, std=0.001)
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        out = self.hs1(self.bn1(self.conv1(x)))
-        out = self.bneck(out)
-        out = self.hs2(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 7)
-        out = out.view(out.size(0), -1)
-        out = self.hs3(self.bn3(self.linear3(out)))
-        out = self.linear4(out)
-        return out
-
-
-def test_MobileNetV3_Large():
-    load_pytorch_module_and_check(
-        MobileNetV3_Large, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow" , oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_pnasnet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_pnasnet.py
deleted file mode 100644
index 073751bf2b0a3f635689eb227a182d90da021656..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_pnasnet.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/pnasnet.py
-
-'''PNASNet in PyTorch.
-Paper: Progressive Neural Architecture Search
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class SepConv(nn.Module):
-    '''Separable Convolution.'''
-    def __init__(self, in_planes, out_planes, kernel_size, stride):
-        super(SepConv, self).__init__()
-        self.conv1 = nn.Conv2d(in_planes, out_planes,
-                               kernel_size, stride,
-                               padding=(kernel_size-1)//2,
-                               bias=False, groups=in_planes)
-        self.bn1 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        return self.bn1(self.conv1(x))
-
-
-class CellA(nn.Module):
-    def __init__(self, in_planes, out_planes, stride=1):
-        super(CellA, self).__init__()
-        self.stride = stride
-        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
-        if stride==2:
-            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-            self.bn1 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        y1 = self.sep_conv1(x)
-        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
-        if self.stride==2:
-            y2 = self.bn1(self.conv1(y2))
-        return F.relu(y1+y2)
-
-class CellB(nn.Module):
-    def __init__(self, in_planes, out_planes, stride=1):
-        super(CellB, self).__init__()
-        self.stride = stride
-        # Left branch
-        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
-        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
-        # Right branch
-        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
-        if stride==2:
-            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-            self.bn1 = nn.BatchNorm2d(out_planes)
-        # Reduce channels
-        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        # Left branch
-        y1 = self.sep_conv1(x)
-        y2 = self.sep_conv2(x)
-        # Right branch
-        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
-        if self.stride==2:
-            y3 = self.bn1(self.conv1(y3))
-        y4 = self.sep_conv3(x)
-        # Concat & reduce channels
-        b1 = F.relu(y1+y2)
-        b2 = F.relu(y3+y4)
-        y = torch.cat([b1,b2], 1)
-        return F.relu(self.bn2(self.conv2(y)))
-
-class PNASNet(nn.Module):
-    def __init__(self, cell_type, num_cells, num_planes):
-        super(PNASNet, self).__init__()
-        self.in_planes = num_planes
-        self.cell_type = cell_type
-
-        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(num_planes)
-
-        self.layer1 = self._make_layer(num_planes, num_cells=6)
-        self.layer2 = self._downsample(num_planes*2)
-        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
-        self.layer4 = self._downsample(num_planes*4)
-        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
-
-        self.linear = nn.Linear(num_planes*4, 10)
-
-    def _make_layer(self, planes, num_cells):
-        layers = []
-        for _ in range(num_cells):
-            layers.append(self.cell_type(self.in_planes, planes, stride=1))
-            self.in_planes = planes
-        return nn.Sequential(*layers)
-
-    def _downsample(self, planes):
-        layer = self.cell_type(self.in_planes, planes, stride=2)
-        self.in_planes = planes
-        return layer
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = self.layer5(out)
-        out = F.avg_pool2d(out, 8)
-        out = self.linear(out.view(out.size(0), -1))
-        return out
-
-
-def PNASNetA():
-    return PNASNet(CellA, num_cells=6, num_planes=44)
-
-def PNASNetB():
-    return PNASNet(CellB, num_cells=6, num_planes=32)
-
-def test_pnasnet():
-    load_pytorch_module_and_check(
-        PNASNetA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_preact_resnet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_preact_resnet.py
deleted file mode 100644
index b124922dbcc2ac0ef931c36786e7db36f3a7af0d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_preact_resnet.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''Pre-activation ResNet in PyTorch.
-Reference:
-[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class PreActBlock(nn.Module):
-    '''Pre-activation version of the BasicBlock.'''
-    expansion = 1
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(PreActBlock, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
-
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(x))
-        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
-        out = self.conv1(out)
-        out = self.conv2(F.relu(self.bn2(out)))
-        out += shortcut
-        return out
-
-
-class PreActBottleneck(nn.Module):
-    '''Pre-activation version of the original Bottleneck module.'''
-    expansion = 4
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(PreActBottleneck, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
-
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(x))
-        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
-        out = self.conv1(out)
-        out = self.conv2(F.relu(self.bn2(out)))
-        out = self.conv3(F.relu(self.bn3(out)))
-        out += shortcut
-        return out
-
-
-class PreActResNet(nn.Module):
-    def __init__(self, block, num_blocks, num_classes=10):
-        super(PreActResNet, self).__init__()
-        self.in_planes = 64
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
-        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
-        self.linear = nn.Linear(512*block.expansion, num_classes)
-
-    def _make_layer(self, block, planes, num_blocks, stride):
-        strides = [stride] + [1]*(num_blocks-1)
-        layers = []
-        for stride in strides:
-            layers.append(block(self.in_planes, planes, stride))
-            self.in_planes = planes * block.expansion
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def PreActResNet18():
-    return PreActResNet(PreActBlock, [2,2,2,2])
-
-def PreActResNet34():
-    return PreActResNet(PreActBlock, [3,4,6,3])
-
-def PreActResNet50():
-    return PreActResNet(PreActBottleneck, [3,4,6,3])
-
-def PreActResNet101():
-    return PreActResNet(PreActBottleneck, [3,4,23,3])
-
-def PreActResNet152():
-    return PreActResNet(PreActBottleneck, [3,8,36,3])
-
-def test_preact_resnet():
-    load_pytorch_module_and_check(
-        PreActResNet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_regnet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_regnet.py
deleted file mode 100644
index 9bc39906491823bd645fab731910bdbfc3913c26..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_regnet.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/regnet.py
-
-
-class SE(nn.Module):
-    """Squeeze-and-Excitation block."""
-
-    def __init__(self, in_planes, se_planes):
-        super(SE, self).__init__()
-        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
-        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
-
-    def forward(self, x):
-        out = F.adaptive_avg_pool2d(x, (1, 1))
-        out = F.relu(self.se1(out))
-        out = self.se2(out).sigmoid()
-        out = x * out
-        return out
-
-
-class Block(nn.Module):
-    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
-        super(Block, self).__init__()
-        # 1x1
-        w_b = int(round(w_out * bottleneck_ratio))
-        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(w_b)
-        # 3x3
-        num_groups = w_b // group_width
-        self.conv2 = nn.Conv2d(
-            w_b,
-            w_b,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=num_groups,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(w_b)
-        # se
-        self.with_se = se_ratio > 0
-        if self.with_se:
-            w_se = int(round(w_in * se_ratio))
-            self.se = SE(w_b, w_se)
-        # 1x1
-        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(w_out)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or w_in != w_out:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(w_in, w_out, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(w_out),
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        if self.with_se:
-            out = self.se(out)
-        out = self.bn3(self.conv3(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class RegNet(nn.Module):
-    def __init__(self, cfg, num_classes=10):
-        super(RegNet, self).__init__()
-        self.cfg = cfg
-        self.in_planes = 64
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.layer1 = self._make_layer(0)
-        self.layer2 = self._make_layer(1)
-        self.layer3 = self._make_layer(2)
-        self.layer4 = self._make_layer(3)
-        self.linear = nn.Linear(self.cfg["widths"][-1], num_classes)
-
-    def _make_layer(self, idx):
-        depth = self.cfg["depths"][idx]
-        width = self.cfg["widths"][idx]
-        stride = self.cfg["strides"][idx]
-        group_width = self.cfg["group_width"]
-        bottleneck_ratio = self.cfg["bottleneck_ratio"]
-        se_ratio = self.cfg["se_ratio"]
-
-        layers = []
-        for i in range(depth):
-            s = stride if i == 0 else 1
-            layers.append(
-                Block(self.in_planes, width, s, group_width, bottleneck_ratio, se_ratio)
-            )
-            self.in_planes = width
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.adaptive_avg_pool2d(out, (1, 1))
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def RegNetX_200MF():
-    cfg = {
-        "depths": [1, 1, 4, 7],
-        "widths": [24, 56, 152, 368],
-        "strides": [1, 1, 2, 2],
-        "group_width": 8,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0,
-    }
-    return RegNet(cfg)
-
-
-def RegNetX_400MF():
-    cfg = {
-        "depths": [1, 2, 7, 12],
-        "widths": [32, 64, 160, 384],
-        "strides": [1, 1, 2, 2],
-        "group_width": 16,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0,
-    }
-    return RegNet(cfg)
-
-
-def RegNetY_400MF():
-    cfg = {
-        "depths": [1, 2, 7, 12],
-        "widths": [32, 64, 160, 384],
-        "strides": [1, 1, 2, 2],
-        "group_width": 16,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0.25,
-    }
-    return RegNet(cfg)
-
-
-def test_regnet():
-    load_pytorch_module_and_check(
-        RegNetX_200MF, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnet18.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnet18.py
deleted file mode 100644
index 006589823c9d9b7254740c1bd833d7fb6e3b42e4..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnet18.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_resnet18():
-    load_pytorch_module_and_check(
-        torchvision.models.resnet18,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow",
-        oneflow_code_gen_flag=True
-    )
-
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnext.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnext.py
deleted file mode 100644
index 3efe057c922b64369d1095b8d8be3a2ed756c186..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnext.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnext.py
-
-
-class Block(nn.Module):
-    """Grouped convolution block."""
-
-    expansion = 2
-
-    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
-        super(Block, self).__init__()
-        group_width = cardinality * bottleneck_width
-        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(group_width)
-        self.conv2 = nn.Conv2d(
-            group_width,
-            group_width,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=cardinality,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(group_width)
-        self.conv3 = nn.Conv2d(
-            group_width, self.expansion * group_width, kernel_size=1, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(self.expansion * group_width)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_planes != self.expansion * group_width:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(
-                    in_planes,
-                    self.expansion * group_width,
-                    kernel_size=1,
-                    stride=stride,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(self.expansion * group_width),
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class ResNeXt(nn.Module):
-    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
-        super(ResNeXt, self).__init__()
-        self.cardinality = cardinality
-        self.bottleneck_width = bottleneck_width
-        self.in_planes = 64
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.layer1 = self._make_layer(num_blocks[0], 1)
-        self.layer2 = self._make_layer(num_blocks[1], 2)
-        self.layer3 = self._make_layer(num_blocks[2], 2)
-        # self.layer4 = self._make_layer(num_blocks[3], 2)
-        self.linear = nn.Linear(cardinality * bottleneck_width * 8, num_classes)
-
-    def _make_layer(self, num_blocks, stride):
-        strides = [stride] + [1] * (num_blocks - 1)
-        layers = []
-        for stride in strides:
-            layers.append(
-                Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)
-            )
-            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
-        # Increase bottleneck_width by 2 after each stage.
-        self.bottleneck_width *= 2
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        # out = self.layer4(out)
-        out = F.avg_pool2d(out, 8)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def ResNeXt29_2x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=2, bottleneck_width=64)
-
-
-def ResNeXt29_4x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=4, bottleneck_width=64)
-
-
-def ResNeXt29_8x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=8, bottleneck_width=64)
-
-
-def ResNeXt29_32x4d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=32, bottleneck_width=4)
-
-
-def test_resnext():
-    load_pytorch_module_and_check(
-        ResNeXt29_2x64d, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_senet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_senet.py
deleted file mode 100644
index 39eff39cc092aa641edc06a2c72a71040a8524e3..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_senet.py
+++ /dev/null
@@ -1,197 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/senet.py
-
-
-class BasicResidualSEBlock(nn.Module):
-
-    expansion = 1
-
-    def __init__(self, in_channels, out_channels, stride, r=16):
-        super().__init__()
-
-        self.residual = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels * self.expansion, 3, padding=1),
-            nn.BatchNorm2d(out_channels * self.expansion),
-            nn.ReLU(inplace=True),
-        )
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_channels != out_channels * self.expansion:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
-                nn.BatchNorm2d(out_channels * self.expansion),
-            )
-
-        self.squeeze = nn.AdaptiveAvgPool2d(1)
-        self.excitation = nn.Sequential(
-            nn.Linear(
-                out_channels * self.expansion, out_channels * self.expansion // r
-            ),
-            nn.ReLU(inplace=True),
-            nn.Linear(
-                out_channels * self.expansion // r, out_channels * self.expansion
-            ),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, x):
-        shortcut = self.shortcut(x)
-        residual = self.residual(x)
-
-        squeeze = self.squeeze(residual)
-        squeeze = squeeze.view(squeeze.size(0), -1)
-        excitation = self.excitation(squeeze)
-        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
-
-        x = residual * excitation.expand_as(residual) + shortcut
-
-        return F.relu(x)
-
-
-class BottleneckResidualSEBlock(nn.Module):
-
-    expansion = 4
-
-    def __init__(self, in_channels, out_channels, stride, r=16):
-        super().__init__()
-
-        self.residual = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, 1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels, 3, stride=stride, padding=1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels * self.expansion, 1),
-            nn.BatchNorm2d(out_channels * self.expansion),
-            nn.ReLU(inplace=True),
-        )
-
-        self.squeeze = nn.AdaptiveAvgPool2d(1)
-        self.excitation = nn.Sequential(
-            nn.Linear(
-                out_channels * self.expansion, out_channels * self.expansion // r
-            ),
-            nn.ReLU(inplace=True),
-            nn.Linear(
-                out_channels * self.expansion // r, out_channels * self.expansion
-            ),
-            nn.Sigmoid(),
-        )
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_channels != out_channels * self.expansion:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
-                nn.BatchNorm2d(out_channels * self.expansion),
-            )
-
-    def forward(self, x):
-
-        shortcut = self.shortcut(x)
-
-        residual = self.residual(x)
-        squeeze = self.squeeze(residual)
-        squeeze = squeeze.view(squeeze.size(0), -1)
-        excitation = self.excitation(squeeze)
-        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
-
-        x = residual * excitation.expand_as(residual) + shortcut
-
-        return F.relu(x)
-
-
-class SEResNet(nn.Module):
-    def __init__(self, block, block_num, class_num=100):
-        super().__init__()
-
-        self.in_channels = 64
-
-        self.pre = nn.Sequential(
-            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
-        )
-
-        self.stage1 = self._make_stage(block, block_num[0], 64, 1)
-        self.stage2 = self._make_stage(block, block_num[1], 128, 2)
-        self.stage3 = self._make_stage(block, block_num[2], 256, 2)
-        self.stage4 = self._make_stage(block, block_num[3], 512, 2)
-
-        self.linear = nn.Linear(self.in_channels, class_num)
-
-    def forward(self, x):
-        x = self.pre(x)
-
-        x = self.stage1(x)
-        x = self.stage2(x)
-        x = self.stage3(x)
-        x = self.stage4(x)
-
-        x = F.adaptive_avg_pool2d(x, 1)
-        x = x.view(x.size(0), -1)
-
-        x = self.linear(x)
-
-        return x
-
-    def _make_stage(self, block, num, out_channels, stride):
-
-        layers = []
-        layers.append(block(self.in_channels, out_channels, stride))
-        self.in_channels = out_channels * block.expansion
-
-        while num - 1:
-            layers.append(block(self.in_channels, out_channels, 1))
-            num -= 1
-
-        return nn.Sequential(*layers)
-
-
-def seresnet18():
-    return SEResNet(BasicResidualSEBlock, [2, 2, 2, 2])
-
-
-def seresnet34():
-    return SEResNet(BasicResidualSEBlock, [3, 4, 6, 3])
-
-
-def seresnet50():
-    return SEResNet(BottleneckResidualSEBlock, [3, 4, 6, 3])
-
-
-def seresnet101():
-    return SEResNet(BottleneckResidualSEBlock, [3, 4, 23, 3])
-
-
-def seresnet152():
-    return SEResNet(BottleneckResidualSEBlock, [3, 8, 36, 3])
-
-
-def test_senet():
-    load_pytorch_module_and_check(
-        seresnet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v1.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v1.py
deleted file mode 100644
index d5bc72f751b21aaa6584ab8bc422af813d33fb2b..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v1.py
+++ /dev/null
@@ -1,136 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class ShuffleBlock(nn.Module):
-    def __init__(self, groups):
-        super(ShuffleBlock, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
-        N, C, H, W = x.size()
-        g = self.groups
-        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, in_planes, out_planes, stride, groups):
-        super(Bottleneck, self).__init__()
-        self.stride = stride
-
-        mid_planes = out_planes // 4
-        g = 1 if in_planes == 24 else groups
-
-        self.conv1 = nn.Conv2d(
-            in_planes, mid_planes, kernel_size=1, groups=g, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(mid_planes)
-        self.shuffle1 = ShuffleBlock(groups=g)
-        self.conv2 = nn.Conv2d(
-            mid_planes,
-            mid_planes,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=mid_planes,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(mid_planes)
-        self.conv3 = nn.Conv2d(
-            mid_planes, out_planes, kernel_size=1, groups=groups, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_planes)
-
-        self.shortcut = nn.Sequential()
-        if stride == 2:
-            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.shuffle1(out)
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        res = self.shortcut(x)
-        out = (
-            F.relu(torch.cat([out, res], 1)) if self.stride == 2 else F.relu(out + res)
-        )
-        return out
-
-
-class ShuffleNet(nn.Module):
-    def __init__(self, cfg):
-        super(ShuffleNet, self).__init__()
-        out_planes = cfg["out_planes"]
-        num_blocks = cfg["num_blocks"]
-        groups = cfg["groups"]
-
-        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(24)
-        self.in_planes = 24
-        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
-        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
-        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
-        self.linear = nn.Linear(out_planes[2], 10)
-
-    def _make_layer(self, out_planes, num_blocks, groups):
-        layers = []
-        for i in range(num_blocks):
-            stride = 2 if i == 0 else 1
-            cat_planes = self.in_planes if i == 0 else 0
-            layers.append(
-                Bottleneck(
-                    self.in_planes,
-                    out_planes - cat_planes,
-                    stride=stride,
-                    groups=groups,
-                )
-            )
-            self.in_planes = out_planes
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def ShuffleNetG2():
-    cfg = {"out_planes": [200, 400, 800], "num_blocks": [4, 8, 4], "groups": 2}
-    return ShuffleNet(cfg)
-
-
-def ShuffleNetG3():
-    cfg = {"out_planes": [240, 480, 960], "num_blocks": [4, 8, 4], "groups": 3}
-    return ShuffleNet(cfg)
-
-
-def test_shufflenet_v1_g2():
-    load_pytorch_module_and_check(
-        ShuffleNetG2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v2.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v2.py
deleted file mode 100644
index 2548fde8370acc405954145f9968748ae4f540a2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v2.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/shufflenetv2.py
-
-
-class ShuffleBlock(nn.Module):
-    def __init__(self, groups=2):
-        super(ShuffleBlock, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
-        N, C, H, W = x.size()
-        g = self.groups
-        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
-
-
-class SplitBlock(nn.Module):
-    def __init__(self, ratio):
-        super(SplitBlock, self).__init__()
-        self.ratio = ratio
-
-    def forward(self, x):
-        c = int(x.size(1) * self.ratio)
-        return x[:, :c, :, :], x[:, c:, :, :]
-
-
-class BasicBlock(nn.Module):
-    def __init__(self, in_channels, split_ratio=0.5):
-        super(BasicBlock, self).__init__()
-        self.split = SplitBlock(split_ratio)
-        in_channels = int(in_channels * split_ratio)
-        self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(in_channels)
-        self.conv2 = nn.Conv2d(
-            in_channels,
-            in_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            groups=in_channels,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(in_channels)
-        self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(in_channels)
-        self.shuffle = ShuffleBlock()
-
-    def forward(self, x):
-        x1, x2 = self.split(x)
-        out = F.relu(self.bn1(self.conv1(x2)))
-        out = self.bn2(self.conv2(out))
-        out = F.relu(self.bn3(self.conv3(out)))
-        out = torch.cat([x1, out], 1)
-        out = self.shuffle(out)
-        return out
-
-
-class DownBlock(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(DownBlock, self).__init__()
-        mid_channels = out_channels // 2
-        # left
-        self.conv1 = nn.Conv2d(
-            in_channels,
-            in_channels,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=in_channels,
-            bias=False,
-        )
-        self.bn1 = nn.BatchNorm2d(in_channels)
-        self.conv2 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(mid_channels)
-        # right
-        self.conv3 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(mid_channels)
-        self.conv4 = nn.Conv2d(
-            mid_channels,
-            mid_channels,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=mid_channels,
-            bias=False,
-        )
-        self.bn4 = nn.BatchNorm2d(mid_channels)
-        self.conv5 = nn.Conv2d(mid_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn5 = nn.BatchNorm2d(mid_channels)
-
-        self.shuffle = ShuffleBlock()
-
-    def forward(self, x):
-        # left
-        out1 = self.bn1(self.conv1(x))
-        out1 = F.relu(self.bn2(self.conv2(out1)))
-        # right
-        out2 = F.relu(self.bn3(self.conv3(x)))
-        out2 = self.bn4(self.conv4(out2))
-        out2 = F.relu(self.bn5(self.conv5(out2)))
-        # concat
-        out = torch.cat([out1, out2], 1)
-        out = self.shuffle(out)
-        return out
-
-
-class ShuffleNetV2(nn.Module):
-    def __init__(self, net_size=0.5):
-        super(ShuffleNetV2, self).__init__()
-        out_channels = configs[net_size]["out_channels"]
-        num_blocks = configs[net_size]["num_blocks"]
-
-        self.conv1 = nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(24)
-        self.in_channels = 24
-        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
-        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
-        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
-        self.conv2 = nn.Conv2d(
-            out_channels[2],
-            out_channels[3],
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(out_channels[3])
-        self.linear = nn.Linear(out_channels[3], 10)
-
-    def _make_layer(self, out_channels, num_blocks):
-        layers = [DownBlock(self.in_channels, out_channels)]
-        for i in range(num_blocks):
-            layers.append(BasicBlock(out_channels))
-            self.in_channels = out_channels
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        # out = F.max_pool2d(out, 3, stride=2, padding=1)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-configs = {
-    0.5: {"out_channels": (48, 96, 192, 1024), "num_blocks": (3, 7, 3)},
-    1: {"out_channels": (116, 232, 464, 1024), "num_blocks": (3, 7, 3)},
-    1.5: {"out_channels": (176, 352, 704, 1024), "num_blocks": (3, 7, 3)},
-    2: {"out_channels": (224, 488, 976, 2048), "num_blocks": (3, 7, 3)},
-}
-
-
-def test_shufflenet_v2():
-    load_pytorch_module_and_check(
-        ShuffleNetV2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_squeezenet.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_squeezenet.py
deleted file mode 100644
index af1bbe08d9c719e73b8d20bb73e1b2b9d4fc5f44..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_squeezenet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_squeezenet():
-    load_pytorch_module_and_check(
-        torchvision.models.SqueezeNet,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow",
-        oneflow_code_gen_flag=True,
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/code_gen/test_vgg16.py b/examples/x2oneflow/pytorch2oneflow/code_gen/test_vgg16.py
deleted file mode 100644
index b64f230c09addbf93e5f8aadf09dbeb16e748649..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_vgg16.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_vgg16():
-    load_pytorch_module_and_check(
-        torchvision.models.vgg16,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow",
-        oneflow_code_gen_flag=True
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_alexnet.py b/examples/x2oneflow/pytorch2oneflow/models/test_alexnet.py
deleted file mode 100644
index 3f6d476e21e74bb21d2cb95ef2c603297d5aac7f..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_alexnet.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_alexnet():
-    load_pytorch_module_and_check(
-        torchvision.models.alexnet,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_densenet.py b/examples/x2oneflow/pytorch2oneflow/models/test_densenet.py
deleted file mode 100644
index 93689196824e7f983a7d5a88c63a264e25bde98b..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_densenet.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-"""DenseNet in PyTorch."""
-import math
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, in_planes, growth_rate):
-        super(Bottleneck, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
-        self.conv2 = nn.Conv2d(
-            4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False
-        )
-
-    def forward(self, x):
-        out = self.conv1(F.relu(self.bn1(x)))
-        out = self.conv2(F.relu(self.bn2(out)))
-        out = torch.cat([out, x], 1)
-        return out
-
-
-class Transition(nn.Module):
-    def __init__(self, in_planes, out_planes):
-        super(Transition, self).__init__()
-        self.bn = nn.BatchNorm2d(in_planes)
-        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
-
-    def forward(self, x):
-        out = self.conv(F.relu(self.bn(x)))
-        out = F.avg_pool2d(out, 2)
-        return out
-
-
-class DenseNet(nn.Module):
-    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
-        super(DenseNet, self).__init__()
-        self.growth_rate = growth_rate
-
-        num_planes = 2 * growth_rate
-        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
-
-        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
-        num_planes += nblocks[0] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans1 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
-        num_planes += nblocks[1] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans2 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
-        num_planes += nblocks[2] * growth_rate
-        out_planes = int(math.floor(num_planes * reduction))
-        self.trans3 = Transition(num_planes, out_planes)
-        num_planes = out_planes
-
-        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
-        num_planes += nblocks[3] * growth_rate
-
-        self.bn = nn.BatchNorm2d(num_planes)
-        self.linear = nn.Linear(num_planes, num_classes)
-
-    def _make_dense_layers(self, block, in_planes, nblock):
-        layers = []
-        for i in range(nblock):
-            layers.append(block(in_planes, self.growth_rate))
-            in_planes += self.growth_rate
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.trans1(self.dense1(out))
-        out = self.trans2(self.dense2(out))
-        out = self.trans3(self.dense3(out))
-        out = self.dense4(out)
-        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def DenseNet121():
-    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=32)
-
-
-def DenseNet169():
-    return DenseNet(Bottleneck, [6, 12, 32, 32], growth_rate=32)
-
-
-def DenseNet201():
-    return DenseNet(Bottleneck, [6, 12, 48, 32], growth_rate=32)
-
-
-def DenseNet161():
-    return DenseNet(Bottleneck, [6, 12, 36, 24], growth_rate=48)
-
-
-def densenet_cifar():
-    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=12)
-
-
-def test_densenet():
-    load_pytorch_module_and_check(
-        densenet_cifar, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_dlanet.py b/examples/x2oneflow/pytorch2oneflow/models/test_dlanet.py
deleted file mode 100644
index ceae77c7c4716378e2e2a169419652501261bb71..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_dlanet.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''DLA in PyTorch.
-Reference:
-    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(BasicBlock, self).__init__()
-        self.conv1 = nn.Conv2d(
-            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
-                               stride=1, padding=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes,
-                          kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(self.expansion*planes)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.bn2(self.conv2(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class Root(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=1):
-        super(Root, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels, out_channels, kernel_size,
-            stride=1, padding=(kernel_size - 1) // 2, bias=False)
-        self.bn = nn.BatchNorm2d(out_channels)
-
-    def forward(self, xs):
-        x = torch.cat(xs, 1)
-        out = F.relu(self.bn(self.conv(x)))
-        return out
-
-
-class Tree(nn.Module):
-    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
-        super(Tree, self).__init__()
-        self.level = level
-        if level == 1:
-            self.root = Root(2*out_channels, out_channels)
-            self.left_node = block(in_channels, out_channels, stride=stride)
-            self.right_node = block(out_channels, out_channels, stride=1)
-        else:
-            self.root = Root((level+2)*out_channels, out_channels)
-            for i in reversed(range(1, level)):
-                subtree = Tree(block, in_channels, out_channels,
-                               level=i, stride=stride)
-                self.__setattr__('level_%d' % i, subtree)
-            self.prev_root = block(in_channels, out_channels, stride=stride)
-            self.left_node = block(out_channels, out_channels, stride=1)
-            self.right_node = block(out_channels, out_channels, stride=1)
-
-    def forward(self, x):
-        xs = [self.prev_root(x)] if self.level > 1 else []
-        for i in reversed(range(1, self.level)):
-            level_i = self.__getattr__('level_%d' % i)
-            x = level_i(x)
-            xs.append(x)
-        x = self.left_node(x)
-        xs.append(x)
-        x = self.right_node(x)
-        xs.append(x)
-        out = self.root(xs)
-        return out
-
-
-class DLA(nn.Module):
-    def __init__(self, block=BasicBlock, num_classes=10):
-        super(DLA, self).__init__()
-        self.base = nn.Sequential(
-            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(16),
-            nn.ReLU(True)
-        )
-
-        self.layer1 = nn.Sequential(
-            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(16),
-            nn.ReLU(True)
-        )
-
-        self.layer2 = nn.Sequential(
-            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
-            nn.BatchNorm2d(32),
-            nn.ReLU(True)
-        )
-
-        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
-        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
-        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
-        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
-        self.linear = nn.Linear(512, num_classes)
-
-    def forward(self, x):
-        out = self.base(x)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = self.layer5(out)
-        out = self.layer6(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-def test_dlanet():
-    load_pytorch_module_and_check(
-        DLA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_dpn.py b/examples/x2oneflow/pytorch2oneflow/models/test_dpn.py
deleted file mode 100644
index 943f458f996b54adb26777c31c381e4af4d19289..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_dpn.py
+++ /dev/null
@@ -1,113 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''Dual Path Networks in PyTorch.'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
-        super(Bottleneck, self).__init__()
-        self.out_planes = out_planes
-        self.dense_depth = dense_depth
-
-        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
-        self.bn2 = nn.BatchNorm2d(in_planes)
-        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
-
-        self.shortcut = nn.Sequential()
-        if first_layer:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(out_planes+dense_depth)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        x = self.shortcut(x)
-        d = self.out_planes
-        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
-        out = F.relu(out)
-        return out
-
-
-class DPN(nn.Module):
-    def __init__(self, cfg):
-        super(DPN, self).__init__()
-        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
-        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.last_planes = 64
-        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
-        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
-        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
-        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
-        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
-
-    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
-        strides = [stride] + [1]*(num_blocks-1)
-        layers = []
-        for i,stride in enumerate(strides):
-            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
-            self.last_planes = out_planes + (i+2) * dense_depth
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def DPN26():
-    cfg = {
-        'in_planes': (96,192,384,768),
-        'out_planes': (256,512,1024,2048),
-        'num_blocks': (2,2,2,2),
-        'dense_depth': (16,32,24,128)
-    }
-    return DPN(cfg)
-
-def DPN92():
-    cfg = {
-        'in_planes': (96,192,384,768),
-        'out_planes': (256,512,1024,2048),
-        'num_blocks': (3,4,20,3),
-        'dense_depth': (16,32,24,128)
-    }
-    return DPN(cfg)
-
-def test_dpn():
-    load_pytorch_module_and_check(
-        DPN26, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_efficientnet.py b/examples/x2oneflow/pytorch2oneflow/models/test_efficientnet.py
deleted file mode 100644
index 427a8425ffb677e7a9b93eee053fc01030267741..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_efficientnet.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/efficientnet.py
-
-
-def swish(x):
-    return x * x.sigmoid()
-
-
-def drop_connect(x, drop_ratio):
-    keep_ratio = 1.0 - drop_ratio
-    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
-    mask.bernoulli_(keep_ratio)
-    x.div_(keep_ratio)
-    x.mul_(mask)
-    return x
-
-
-class SE(nn.Module):
-    """Squeeze-and-Excitation block with Swish."""
-
-    def __init__(self, in_channels, se_channels):
-        super(SE, self).__init__()
-        self.se1 = nn.Conv2d(in_channels, se_channels, kernel_size=1, bias=True)
-        self.se2 = nn.Conv2d(se_channels, in_channels, kernel_size=1, bias=True)
-
-    def forward(self, x):
-        out = F.adaptive_avg_pool2d(x, (1, 1))
-        out = swish(self.se1(out))
-        out = self.se2(out).sigmoid()
-        out = x * out
-        return out
-
-
-class Block(nn.Module):
-    """expansion + depthwise + pointwise + squeeze-excitation"""
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        expand_ratio=1,
-        se_ratio=0.0,
-        drop_rate=0.0,
-    ):
-        super(Block, self).__init__()
-        self.stride = stride
-        self.drop_rate = drop_rate
-        self.expand_ratio = expand_ratio
-
-        # Expansion
-        channels = expand_ratio * in_channels
-        self.conv1 = nn.Conv2d(
-            in_channels, channels, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(channels)
-
-        # Depthwise conv
-        self.conv2 = nn.Conv2d(
-            channels,
-            channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(1 if kernel_size == 3 else 2),
-            groups=channels,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(channels)
-
-        # SE layers
-        se_channels = int(in_channels * se_ratio)
-        self.se = SE(channels, se_channels)
-
-        # Output
-        self.conv3 = nn.Conv2d(
-            channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_channels)
-
-        # Skip connection if in and out shapes are the same (MV-V2 style)
-        self.has_skip = (stride == 1) and (in_channels == out_channels)
-
-    def forward(self, x):
-        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
-        out = swish(self.bn2(self.conv2(out)))
-        out = self.se(out)
-        out = self.bn3(self.conv3(out))
-        if self.has_skip:
-            if self.training and self.drop_rate > 0:
-                out = drop_connect(out, self.drop_rate)
-            out = out + x
-        return out
-
-
-class EfficientNet(nn.Module):
-    def __init__(self, cfg, num_classes=10):
-        super(EfficientNet, self).__init__()
-        self.cfg = cfg
-        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(32)
-        self.layers = self._make_layers(in_channels=32)
-        self.linear = nn.Linear(cfg["out_channels"][-1], num_classes)
-
-    def _make_layers(self, in_channels):
-        layers = []
-        cfg = [
-            self.cfg[k]
-            for k in [
-                "expansion",
-                "out_channels",
-                "num_blocks",
-                "kernel_size",
-                "stride",
-            ]
-        ]
-        b = 0
-        blocks = sum(self.cfg["num_blocks"])
-        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
-            strides = [stride] + [1] * (num_blocks - 1)
-            for stride in strides:
-                drop_rate = self.cfg["drop_connect_rate"] * b / blocks
-                layers.append(
-                    Block(
-                        in_channels,
-                        out_channels,
-                        kernel_size,
-                        stride,
-                        expansion,
-                        se_ratio=0.25,
-                        drop_rate=drop_rate,
-                    )
-                )
-                in_channels = out_channels
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = swish(self.bn1(self.conv1(x)))
-        out = self.layers(out)
-        out = F.adaptive_avg_pool2d(out, 1)
-        out = out.view(out.size(0), -1)
-        dropout_rate = self.cfg["dropout_rate"]
-        if self.training and dropout_rate > 0:
-            out = F.dropout(out, p=dropout_rate)
-        out = self.linear(out)
-        return out
-
-
-def EfficientNetB0():
-    cfg = {
-        "num_blocks": [1, 2, 2, 3, 3, 4, 1],
-        "expansion": [1, 6, 6, 6, 6, 6, 6],
-        "out_channels": [16, 24, 40, 80, 112, 192, 320],
-        "kernel_size": [3, 3, 5, 3, 5, 5, 3],
-        "stride": [1, 2, 2, 2, 1, 2, 1],
-        "dropout_rate": 0.2,
-        "drop_connect_rate": 0.2,
-    }
-    return EfficientNet(cfg)
-
-
-def test_efficientNetB0():
-    load_pytorch_module_and_check(
-        EfficientNetB0, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_googlenet.py b/examples/x2oneflow/pytorch2oneflow/models/test_googlenet.py
deleted file mode 100644
index 90b0b462ef3c61fc0ae4bf2191b3e266bd2b6b93..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_googlenet.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/googlenet.py
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Inception(nn.Module):
-    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
-        super(Inception, self).__init__()
-        # 1x1 conv branch
-        self.b1 = nn.Sequential(
-            nn.Conv2d(in_planes, n1x1, kernel_size=1),
-            nn.BatchNorm2d(n1x1),
-            nn.ReLU(True),
-        )
-
-        # 1x1 conv -> 3x3 conv branch
-        self.b2 = nn.Sequential(
-            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
-            nn.BatchNorm2d(n3x3red),
-            nn.ReLU(True),
-            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n3x3),
-            nn.ReLU(True),
-        )
-
-        # 1x1 conv -> 5x5 conv branch
-        self.b3 = nn.Sequential(
-            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
-            nn.BatchNorm2d(n5x5red),
-            nn.ReLU(True),
-            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n5x5),
-            nn.ReLU(True),
-            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
-            nn.BatchNorm2d(n5x5),
-            nn.ReLU(True),
-        )
-
-        # 3x3 pool -> 1x1 conv branch
-        self.b4 = nn.Sequential(
-            nn.MaxPool2d(3, stride=1, padding=1),
-            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
-            nn.BatchNorm2d(pool_planes),
-            nn.ReLU(True),
-        )
-
-    def forward(self, x):
-        y1 = self.b1(x)
-        y2 = self.b2(x)
-        y3 = self.b3(x)
-        y4 = self.b4(x)
-        return torch.cat([y1, y2, y3, y4], 1)
-
-
-class GoogLeNet(nn.Module):
-    def __init__(self):
-        super(GoogLeNet, self).__init__()
-        self.pre_layers = nn.Sequential(
-            nn.Conv2d(3, 192, kernel_size=3, padding=1),
-            nn.BatchNorm2d(192),
-            nn.ReLU(True),
-        )
-
-        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
-        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
-
-        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-
-        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
-        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
-        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
-        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
-        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
-
-        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
-        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
-
-        self.avgpool = nn.AvgPool2d(8, stride=1)
-        self.linear = nn.Linear(1024, 10)
-
-    def forward(self, x):
-        out = self.pre_layers(x)
-        out = self.a3(out)
-        out = self.b3(out)
-        out = self.maxpool(out)
-        out = self.a4(out)
-        out = self.b4(out)
-        out = self.c4(out)
-        out = self.d4(out)
-        out = self.e4(out)
-        out = self.maxpool(out)
-        out = self.a5(out)
-        out = self.b5(out)
-        out = self.avgpool(out)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def test_googlenet():
-    load_pytorch_module_and_check(
-        GoogLeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_inception.py b/examples/x2oneflow/pytorch2oneflow/models/test_inception.py
deleted file mode 100644
index 20fe97f49349121c8751f3b3cec1cdc5fc1cd2da..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_inception.py
+++ /dev/null
@@ -1,349 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/inceptionv3.py
-
-
-class BasicConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, **kwargs):
-        super().__init__()
-        self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs)
-        self.bn = nn.BatchNorm2d(output_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-
-        return x
-
-
-# same naive inception module
-class InceptionA(nn.Module):
-    def __init__(self, input_channels, pool_features):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1)
-
-        self.branch5x5 = nn.Sequential(
-            BasicConv2d(input_channels, 48, kernel_size=1),
-            BasicConv2d(48, 64, kernel_size=5, padding=2),
-        )
-
-        self.branch3x3 = nn.Sequential(
-            BasicConv2d(input_channels, 64, kernel_size=1),
-            BasicConv2d(64, 96, kernel_size=3, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, padding=1),
-        )
-
-        self.branchpool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1(same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1x1 -> 5x5(same)
-        branch5x5 = self.branch5x5(x)
-        # branch5x5 = self.branch5x5_2(branch5x5)
-
-        # x -> 1x1 -> 3x3 -> 3x3(same)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> pool -> 1x1(same)
-        branchpool = self.branchpool(x)
-
-        outputs = [branch1x1, branch5x5, branch3x3, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# downsample
-# Factorization into smaller convolutions
-class InceptionB(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-
-        self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2)
-
-        self.branch3x3stack = nn.Sequential(
-            BasicConv2d(input_channels, 64, kernel_size=1),
-            BasicConv2d(64, 96, kernel_size=3, padding=1),
-            BasicConv2d(96, 96, kernel_size=3, stride=2),
-        )
-
-        self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2)
-
-    def forward(self, x):
-
-        # x - > 3x3(downsample)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> 3x3 -> 3x3(downsample)
-        branch3x3stack = self.branch3x3stack(x)
-
-        # x -> avgpool(downsample)
-        branchpool = self.branchpool(x)
-
-        # """We can use two parallel stride 2 blocks: P and C. P is a pooling
-        # layer (either average or maximum pooling) the activation, both of
-        # them are stride 2 the filter banks of which are concatenated as in
-        # figure 10."""
-        outputs = [branch3x3, branch3x3stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# Factorizing Convolutions with Large Filter Size
-class InceptionC(nn.Module):
-    def __init__(self, input_channels, channels_7x7):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1)
-
-        c7 = channels_7x7
-
-        # In theory, we could go even further and argue that one can replace any n × n
-        # convolution by a 1 × n convolution followed by a n × 1 convolution and the
-        # computational cost saving increases dramatically as n grows (see figure 6).
-        self.branch7x7 = nn.Sequential(
-            BasicConv2d(input_channels, c7, kernel_size=1),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
-        )
-
-        self.branch7x7stack = nn.Sequential(
-            BasicConv2d(input_channels, c7, kernel_size=1),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)),
-            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
-        )
-
-        self.branch_pool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, 192, kernel_size=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1(same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1layer 1*7 and 7*1 (same)
-        branch7x7 = self.branch7x7(x)
-
-        # x-> 2layer 1*7 and 7*1(same)
-        branch7x7stack = self.branch7x7stack(x)
-
-        # x-> avgpool (same)
-        branchpool = self.branch_pool(x)
-
-        outputs = [branch1x1, branch7x7, branch7x7stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-class InceptionD(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-
-        self.branch3x3 = nn.Sequential(
-            BasicConv2d(input_channels, 192, kernel_size=1),
-            BasicConv2d(192, 320, kernel_size=3, stride=2),
-        )
-
-        self.branch7x7 = nn.Sequential(
-            BasicConv2d(input_channels, 192, kernel_size=1),
-            BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)),
-            BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)),
-            BasicConv2d(192, 192, kernel_size=3, stride=2),
-        )
-
-        self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2)
-
-    def forward(self, x):
-
-        # x -> 1x1 -> 3x3(downsample)
-        branch3x3 = self.branch3x3(x)
-
-        # x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample)
-        branch7x7 = self.branch7x7(x)
-
-        # x -> avgpool (downsample)
-        branchpool = self.branchpool(x)
-
-        outputs = [branch3x3, branch7x7, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-# same
-class InceptionE(nn.Module):
-    def __init__(self, input_channels):
-        super().__init__()
-        self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1)
-
-        self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1)
-        self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
-        self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
-
-        self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1)
-        self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1)
-        self.branch3x3stack_3a = BasicConv2d(
-            384, 384, kernel_size=(1, 3), padding=(0, 1)
-        )
-        self.branch3x3stack_3b = BasicConv2d(
-            384, 384, kernel_size=(3, 1), padding=(1, 0)
-        )
-
-        self.branch_pool = nn.Sequential(
-            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
-            BasicConv2d(input_channels, 192, kernel_size=1),
-        )
-
-    def forward(self, x):
-
-        # x -> 1x1 (same)
-        branch1x1 = self.branch1x1(x)
-
-        # x -> 1x1 -> 3x1
-        # x -> 1x1 -> 1x3
-        # concatenate(3x1, 1x3)
-        # """7. Inception modules with expanded the filter bank outputs.
-        # This architecture is used on the coarsest (8 × 8) grids to promote
-        # high dimensional representations, as suggested by principle
-        # 2 of Section 2."""
-        branch3x3 = self.branch3x3_1(x)
-        branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)]
-        branch3x3 = torch.cat(branch3x3, 1)
-
-        # x -> 1x1 -> 3x3 -> 1x3
-        # x -> 1x1 -> 3x3 -> 3x1
-        # concatenate(1x3, 3x1)
-        branch3x3stack = self.branch3x3stack_1(x)
-        branch3x3stack = self.branch3x3stack_2(branch3x3stack)
-        branch3x3stack = [
-            self.branch3x3stack_3a(branch3x3stack),
-            self.branch3x3stack_3b(branch3x3stack),
-        ]
-        branch3x3stack = torch.cat(branch3x3stack, 1)
-
-        branchpool = self.branch_pool(x)
-
-        outputs = [branch1x1, branch3x3, branch3x3stack, branchpool]
-
-        return torch.cat(outputs, 1)
-
-
-class InceptionV3(nn.Module):
-    def __init__(self, num_classes=100):
-        super().__init__()
-        self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, padding=1)
-        self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1)
-        self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
-        self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
-        self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
-
-        # naive inception module
-        self.Mixed_5b = InceptionA(192, pool_features=32)
-        self.Mixed_5c = InceptionA(256, pool_features=64)
-        self.Mixed_5d = InceptionA(288, pool_features=64)
-
-        # downsample
-        self.Mixed_6a = InceptionB(288)
-
-        self.Mixed_6b = InceptionC(768, channels_7x7=128)
-        self.Mixed_6c = InceptionC(768, channels_7x7=160)
-        self.Mixed_6d = InceptionC(768, channels_7x7=160)
-        self.Mixed_6e = InceptionC(768, channels_7x7=192)
-
-        # downsample
-        self.Mixed_7a = InceptionD(768)
-
-        self.Mixed_7b = InceptionE(1280)
-        self.Mixed_7c = InceptionE(2048)
-
-        # 6*6 feature size
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.dropout = nn.Dropout2d()
-        self.linear = nn.Linear(2048, num_classes)
-
-    def forward(self, x):
-
-        # 32 -> 30
-        x = self.Conv2d_1a_3x3(x)
-        x = self.Conv2d_2a_3x3(x)
-        x = self.Conv2d_2b_3x3(x)
-        x = self.Conv2d_3b_1x1(x)
-        x = self.Conv2d_4a_3x3(x)
-
-        # 30 -> 30
-        x = self.Mixed_5b(x)
-        x = self.Mixed_5c(x)
-        x = self.Mixed_5d(x)
-
-        # 30 -> 14
-        # Efficient Grid Size Reduction to avoid representation
-        # bottleneck
-        x = self.Mixed_6a(x)
-
-        # 14 -> 14
-        # """In practice, we have found that employing this factorization does not
-        # work well on early layers, but it gives very good results on medium
-        # grid-sizes (On m × m feature maps, where m ranges between 12 and 20).
-        # On that level, very good results can be achieved by using 1 × 7 convolutions
-        # followed by 7 × 1 convolutions."""
-        x = self.Mixed_6b(x)
-        x = self.Mixed_6c(x)
-        x = self.Mixed_6d(x)
-        x = self.Mixed_6e(x)
-
-        # 14 -> 6
-        # Efficient Grid Size Reduction
-        x = self.Mixed_7a(x)
-
-        # 6 -> 6
-        # We are using this solution only on the coarsest grid,
-        # since that is the place where producing high dimensional
-        # sparse representation is the most critical as the ratio of
-        # local processing (by 1 × 1 convolutions) is increased compared
-        # to the spatial aggregation."""
-        x = self.Mixed_7b(x)
-        x = self.Mixed_7c(x)
-
-        # 6 -> 1
-        x = self.avgpool(x)
-        x = self.dropout(x)
-        x = x.view(x.size(0), -1)
-        x = self.linear(x)
-        return x
-
-
-def test_inception_v3():
-    load_pytorch_module_and_check(
-        InceptionV3, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_lenet.py b/examples/x2oneflow/pytorch2oneflow/models/test_lenet.py
deleted file mode 100644
index d4bc02b6b2630c28421de207be705a2c1f72825e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_lenet.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/lenet.py
-
-'''LeNet in PyTorch.'''
-import torch.nn as nn
-import torch.nn.functional as F
-
-class LeNet(nn.Module):
-    def __init__(self):
-        super(LeNet, self).__init__()
-        self.conv1 = nn.Conv2d(3, 6, 5)
-        self.conv2 = nn.Conv2d(6, 16, 5)
-        self.fc1   = nn.Linear(16*5*5, 120)
-        self.fc2   = nn.Linear(120, 84)
-        self.fc3   = nn.Linear(84, 10)
-
-    def forward(self, x):
-        out = F.relu(self.conv1(x))
-        out = F.max_pool2d(out, 2)
-        out = F.relu(self.conv2(out))
-        out = F.max_pool2d(out, 2)
-        out = out.view(out.size(0), -1)
-        out = F.relu(self.fc1(out))
-        out = F.relu(self.fc2(out))
-        out = self.fc3(out)
-        return out
-
-def test_lenet():
-    load_pytorch_module_and_check(
-        LeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v1.py b/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v1.py
deleted file mode 100644
index 738e420079cec09c3906be7fec23f03a315f398c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v1.py
+++ /dev/null
@@ -1,164 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/mobilenet.py
-
-import torch
-import torch.nn as nn
-
-
-class DepthSeperabelConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
-        super().__init__()
-        self.depthwise = nn.Sequential(
-            nn.Conv2d(
-                input_channels,
-                input_channels,
-                kernel_size,
-                groups=input_channels,
-                **kwargs
-            ),
-            nn.BatchNorm2d(input_channels),
-            nn.ReLU(inplace=True),
-        )
-
-        self.pointwise = nn.Sequential(
-            nn.Conv2d(input_channels, output_channels, 1),
-            nn.BatchNorm2d(output_channels),
-            nn.ReLU(inplace=True),
-        )
-
-    def forward(self, x):
-        x = self.depthwise(x)
-        x = self.pointwise(x)
-
-        return x
-
-
-class BasicConv2d(nn.Module):
-    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
-
-        super().__init__()
-        self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
-        self.bn = nn.BatchNorm2d(output_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-
-        return x
-
-
-class MobileNetV1(nn.Module):
-
-    """
-    Args:
-        width multipler: The role of the width multiplier α is to thin
-                         a network uniformly at each layer. For a given
-                         layer and width multiplier α, the number of
-                         input channels M becomes αM and the number of
-                         output channels N becomes αN.
-    """
-
-    def __init__(self, width_multiplier=1, class_num=100):
-        super().__init__()
-
-        alpha = width_multiplier
-        self.stem = nn.Sequential(
-            BasicConv2d(3, int(32 * alpha), 3, padding=1, bias=False),
-            DepthSeperabelConv2d(
-                int(32 * alpha), int(64 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv1 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(64 * alpha), int(128 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(128 * alpha), int(128 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv2 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(128 * alpha), int(256 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(256 * alpha), int(256 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv3 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(256 * alpha), int(512 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        # downsample
-        self.conv4 = nn.Sequential(
-            DepthSeperabelConv2d(
-                int(512 * alpha), int(1024 * alpha), 3, stride=2, padding=1, bias=False
-            ),
-            DepthSeperabelConv2d(
-                int(1024 * alpha), int(1024 * alpha), 3, padding=1, bias=False
-            ),
-        )
-
-        self.fc = nn.Linear(int(1024 * alpha), class_num)
-        self.avg = nn.AdaptiveAvgPool2d(1)
-
-    def forward(self, x):
-        x = self.stem(x)
-
-        x = self.conv1(x)
-        x = self.conv2(x)
-        x = self.conv3(x)
-        x = self.conv4(x)
-
-        x = self.avg(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        return x
-
-
-def test_mobilenet_v1():
-    load_pytorch_module_and_check(
-        MobileNetV1, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v2.py b/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v2.py
deleted file mode 100644
index 04de91bdf6e927886f41ca26d5cedb8e14eadf8e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v2.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_mobilenet_v2():
-    load_pytorch_module_and_check(
-        torchvision.models.mobilenet_v2,
-        input_size=(1, 3, 224, 224),
-        input_min_val=0,
-        input_max_val=1,
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v3.py b/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v3.py
deleted file mode 100644
index 5740899b49ecaf800b3567ff0e6643524d4b7112..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_mobilenet_v3.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn import init
-
-# https://github.com/xiaolai-sqlai/mobilenetv3/blob/master/mobilenetv3.py
-
-
-class hswish(nn.Module):
-    def forward(self, x):
-        out = x * F.relu6(x + 3, inplace=True) / 6
-        return out
-
-
-class hsigmoid(nn.Module):
-    def forward(self, x):
-        out = F.relu6(x + 3, inplace=True) / 6
-        return out
-
-
-class SeModule(nn.Module):
-    def __init__(self, in_size, reduction=4):
-        super(SeModule, self).__init__()
-        self.se = nn.Sequential(
-            nn.AdaptiveAvgPool2d(1),
-            nn.Conv2d(
-                in_size,
-                in_size // reduction,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            ),
-            nn.BatchNorm2d(in_size // reduction),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(
-                in_size // reduction,
-                in_size,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            ),
-            nn.BatchNorm2d(in_size),
-            hsigmoid(),
-        )
-
-    def forward(self, x):
-        return x * self.se(x)
-
-
-class Block(nn.Module):
-    """expand + depthwise + pointwise"""
-
-    def __init__(
-        self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride
-    ):
-        super(Block, self).__init__()
-        self.stride = stride
-        self.se = semodule
-
-        self.conv1 = nn.Conv2d(
-            in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(expand_size)
-        self.nolinear1 = nolinear
-        self.conv2 = nn.Conv2d(
-            expand_size,
-            expand_size,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=kernel_size // 2,
-            groups=expand_size,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(expand_size)
-        self.nolinear2 = nolinear
-        self.conv3 = nn.Conv2d(
-            expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_size)
-
-        self.shortcut = nn.Sequential()
-        if stride == 1 and in_size != out_size:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(
-                    in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
-                ),
-                nn.BatchNorm2d(out_size),
-            )
-
-    def forward(self, x):
-        out = self.nolinear1(self.bn1(self.conv1(x)))
-        out = self.nolinear2(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        if self.se != None:
-            out = self.se(out)
-        out = out + self.shortcut(x) if self.stride == 1 else out
-        return out
-
-
-class MobileNetV3_Large(nn.Module):
-    def __init__(self, num_classes=1000):
-        super(MobileNetV3_Large, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.hs1 = hswish()
-
-        self.bneck = nn.Sequential(
-            Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
-            Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
-            Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
-            Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
-            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
-            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
-            Block(3, 40, 240, 80, hswish(), None, 2),
-            Block(3, 80, 200, 80, hswish(), None, 1),
-            Block(3, 80, 184, 80, hswish(), None, 1),
-            Block(3, 80, 184, 80, hswish(), None, 1),
-            Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
-            Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
-            Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
-            Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
-            Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
-        )
-
-        self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(960)
-        self.hs2 = hswish()
-        self.linear3 = nn.Linear(960, 1280)
-        self.bn3 = nn.BatchNorm1d(1280)
-        self.hs3 = hswish()
-        self.linear4 = nn.Linear(1280, num_classes)
-        self.init_params()
-
-    def init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d):
-                init.constant_(m.weight, 1)
-                init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                init.normal_(m.weight, std=0.001)
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        out = self.hs1(self.bn1(self.conv1(x)))
-        out = self.bneck(out)
-        out = self.hs2(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 7)
-        out = out.view(out.size(0), -1)
-        out = self.hs3(self.bn3(self.linear3(out)))
-        out = self.linear4(out)
-        return out
-
-
-class MobileNetV3_Small(nn.Module):
-    def __init__(self, num_classes=1000):
-        super(MobileNetV3_Small, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.hs1 = hswish()
-
-        self.bneck = nn.Sequential(
-            Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
-            Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
-            Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
-            Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
-            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
-            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
-            Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
-            Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
-            Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
-            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
-            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
-        )
-
-        self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(576)
-        self.hs2 = hswish()
-        self.linear3 = nn.Linear(576, 1280)
-        self.bn3 = nn.BatchNorm1d(1280)
-        self.hs3 = hswish()
-        self.linear4 = nn.Linear(1280, num_classes)
-        self.init_params()
-
-    def init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d):
-                init.constant_(m.weight, 1)
-                init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                init.normal_(m.weight, std=0.001)
-                if m.bias is not None:
-                    init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        out = self.hs1(self.bn1(self.conv1(x)))
-        out = self.bneck(out)
-        out = self.hs2(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 7)
-        out = out.view(out.size(0), -1)
-        out = self.hs3(self.bn3(self.linear3(out)))
-        out = self.linear4(out)
-        return out
-
-
-def test_MobileNetV3_Large():
-    load_pytorch_module_and_check(
-        MobileNetV3_Large, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_pnasnet.py b/examples/x2oneflow/pytorch2oneflow/models/test_pnasnet.py
deleted file mode 100644
index c9bc63f114aa545a605ad2ec55621168261a7409..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_pnasnet.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/pnasnet.py
-
-'''PNASNet in PyTorch.
-Paper: Progressive Neural Architecture Search
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class SepConv(nn.Module):
-    '''Separable Convolution.'''
-    def __init__(self, in_planes, out_planes, kernel_size, stride):
-        super(SepConv, self).__init__()
-        self.conv1 = nn.Conv2d(in_planes, out_planes,
-                               kernel_size, stride,
-                               padding=(kernel_size-1)//2,
-                               bias=False, groups=in_planes)
-        self.bn1 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        return self.bn1(self.conv1(x))
-
-
-class CellA(nn.Module):
-    def __init__(self, in_planes, out_planes, stride=1):
-        super(CellA, self).__init__()
-        self.stride = stride
-        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
-        if stride==2:
-            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-            self.bn1 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        y1 = self.sep_conv1(x)
-        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
-        if self.stride==2:
-            y2 = self.bn1(self.conv1(y2))
-        return F.relu(y1+y2)
-
-class CellB(nn.Module):
-    def __init__(self, in_planes, out_planes, stride=1):
-        super(CellB, self).__init__()
-        self.stride = stride
-        # Left branch
-        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
-        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
-        # Right branch
-        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
-        if stride==2:
-            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-            self.bn1 = nn.BatchNorm2d(out_planes)
-        # Reduce channels
-        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
-        self.bn2 = nn.BatchNorm2d(out_planes)
-
-    def forward(self, x):
-        # Left branch
-        y1 = self.sep_conv1(x)
-        y2 = self.sep_conv2(x)
-        # Right branch
-        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
-        if self.stride==2:
-            y3 = self.bn1(self.conv1(y3))
-        y4 = self.sep_conv3(x)
-        # Concat & reduce channels
-        b1 = F.relu(y1+y2)
-        b2 = F.relu(y3+y4)
-        y = torch.cat([b1,b2], 1)
-        return F.relu(self.bn2(self.conv2(y)))
-
-class PNASNet(nn.Module):
-    def __init__(self, cell_type, num_cells, num_planes):
-        super(PNASNet, self).__init__()
-        self.in_planes = num_planes
-        self.cell_type = cell_type
-
-        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(num_planes)
-
-        self.layer1 = self._make_layer(num_planes, num_cells=6)
-        self.layer2 = self._downsample(num_planes*2)
-        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
-        self.layer4 = self._downsample(num_planes*4)
-        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
-
-        self.linear = nn.Linear(num_planes*4, 10)
-
-    def _make_layer(self, planes, num_cells):
-        layers = []
-        for _ in range(num_cells):
-            layers.append(self.cell_type(self.in_planes, planes, stride=1))
-            self.in_planes = planes
-        return nn.Sequential(*layers)
-
-    def _downsample(self, planes):
-        layer = self.cell_type(self.in_planes, planes, stride=2)
-        self.in_planes = planes
-        return layer
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = self.layer5(out)
-        out = F.avg_pool2d(out, 8)
-        out = self.linear(out.view(out.size(0), -1))
-        return out
-
-
-def PNASNetA():
-    return PNASNet(CellA, num_cells=6, num_planes=44)
-
-def PNASNetB():
-    return PNASNet(CellB, num_cells=6, num_planes=32)
-
-def test_pnasnet():
-    load_pytorch_module_and_check(
-        PNASNetA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_preact_resnet.py b/examples/x2oneflow/pytorch2oneflow/models/test_preact_resnet.py
deleted file mode 100644
index 642324d7a2e8617eee1159b84e72ace191daaa25..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_preact_resnet.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-'''Pre-activation ResNet in PyTorch.
-Reference:
-[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
-'''
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class PreActBlock(nn.Module):
-    '''Pre-activation version of the BasicBlock.'''
-    expansion = 1
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(PreActBlock, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
-
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(x))
-        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
-        out = self.conv1(out)
-        out = self.conv2(F.relu(self.bn2(out)))
-        out += shortcut
-        return out
-
-
-class PreActBottleneck(nn.Module):
-    '''Pre-activation version of the original Bottleneck module.'''
-    expansion = 4
-
-    def __init__(self, in_planes, planes, stride=1):
-        super(PreActBottleneck, self).__init__()
-        self.bn1 = nn.BatchNorm2d(in_planes)
-        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(planes)
-        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
-
-        if stride != 1 or in_planes != self.expansion*planes:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(x))
-        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
-        out = self.conv1(out)
-        out = self.conv2(F.relu(self.bn2(out)))
-        out = self.conv3(F.relu(self.bn3(out)))
-        out += shortcut
-        return out
-
-
-class PreActResNet(nn.Module):
-    def __init__(self, block, num_blocks, num_classes=10):
-        super(PreActResNet, self).__init__()
-        self.in_planes = 64
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
-        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
-        self.linear = nn.Linear(512*block.expansion, num_classes)
-
-    def _make_layer(self, block, planes, num_blocks, stride):
-        strides = [stride] + [1]*(num_blocks-1)
-        layers = []
-        for stride in strides:
-            layers.append(block(self.in_planes, planes, stride))
-            self.in_planes = planes * block.expansion
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = self.conv1(x)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def PreActResNet18():
-    return PreActResNet(PreActBlock, [2,2,2,2])
-
-def PreActResNet34():
-    return PreActResNet(PreActBlock, [3,4,6,3])
-
-def PreActResNet50():
-    return PreActResNet(PreActBottleneck, [3,4,6,3])
-
-def PreActResNet101():
-    return PreActResNet(PreActBottleneck, [3,4,23,3])
-
-def PreActResNet152():
-    return PreActResNet(PreActBottleneck, [3,8,36,3])
-
-def test_preact_resnet():
-    load_pytorch_module_and_check(
-        PreActResNet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_regnet.py b/examples/x2oneflow/pytorch2oneflow/models/test_regnet.py
deleted file mode 100644
index 712e61fb8ae0ac2adcf7e631789d232bf44d8e77..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_regnet.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/regnet.py
-
-
-class SE(nn.Module):
-    """Squeeze-and-Excitation block."""
-
-    def __init__(self, in_planes, se_planes):
-        super(SE, self).__init__()
-        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
-        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
-
-    def forward(self, x):
-        out = F.adaptive_avg_pool2d(x, (1, 1))
-        out = F.relu(self.se1(out))
-        out = self.se2(out).sigmoid()
-        out = x * out
-        return out
-
-
-class Block(nn.Module):
-    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
-        super(Block, self).__init__()
-        # 1x1
-        w_b = int(round(w_out * bottleneck_ratio))
-        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(w_b)
-        # 3x3
-        num_groups = w_b // group_width
-        self.conv2 = nn.Conv2d(
-            w_b,
-            w_b,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=num_groups,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(w_b)
-        # se
-        self.with_se = se_ratio > 0
-        if self.with_se:
-            w_se = int(round(w_in * se_ratio))
-            self.se = SE(w_b, w_se)
-        # 1x1
-        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(w_out)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or w_in != w_out:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(w_in, w_out, kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(w_out),
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        if self.with_se:
-            out = self.se(out)
-        out = self.bn3(self.conv3(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class RegNet(nn.Module):
-    def __init__(self, cfg, num_classes=10):
-        super(RegNet, self).__init__()
-        self.cfg = cfg
-        self.in_planes = 64
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.layer1 = self._make_layer(0)
-        self.layer2 = self._make_layer(1)
-        self.layer3 = self._make_layer(2)
-        self.layer4 = self._make_layer(3)
-        self.linear = nn.Linear(self.cfg["widths"][-1], num_classes)
-
-    def _make_layer(self, idx):
-        depth = self.cfg["depths"][idx]
-        width = self.cfg["widths"][idx]
-        stride = self.cfg["strides"][idx]
-        group_width = self.cfg["group_width"]
-        bottleneck_ratio = self.cfg["bottleneck_ratio"]
-        se_ratio = self.cfg["se_ratio"]
-
-        layers = []
-        for i in range(depth):
-            s = stride if i == 0 else 1
-            layers.append(
-                Block(self.in_planes, width, s, group_width, bottleneck_ratio, se_ratio)
-            )
-            self.in_planes = width
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = self.layer4(out)
-        out = F.adaptive_avg_pool2d(out, (1, 1))
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def RegNetX_200MF():
-    cfg = {
-        "depths": [1, 1, 4, 7],
-        "widths": [24, 56, 152, 368],
-        "strides": [1, 1, 2, 2],
-        "group_width": 8,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0,
-    }
-    return RegNet(cfg)
-
-
-def RegNetX_400MF():
-    cfg = {
-        "depths": [1, 2, 7, 12],
-        "widths": [32, 64, 160, 384],
-        "strides": [1, 1, 2, 2],
-        "group_width": 16,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0,
-    }
-    return RegNet(cfg)
-
-
-def RegNetY_400MF():
-    cfg = {
-        "depths": [1, 2, 7, 12],
-        "widths": [32, 64, 160, 384],
-        "strides": [1, 1, 2, 2],
-        "group_width": 16,
-        "bottleneck_ratio": 1,
-        "se_ratio": 0.25,
-    }
-    return RegNet(cfg)
-
-
-def test_regnet():
-    load_pytorch_module_and_check(
-        RegNetX_200MF, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_resnet18.py b/examples/x2oneflow/pytorch2oneflow/models/test_resnet18.py
deleted file mode 100644
index ca6af02b238dc50e48224743f176ad814bdab762..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_resnet18.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_resnet18():
-    load_pytorch_module_and_check(
-        torchvision.models.resnet18,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow" 
-    )
-
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_resnext.py b/examples/x2oneflow/pytorch2oneflow/models/test_resnext.py
deleted file mode 100644
index e1ab4aeedf24d3546d10fbfa5e3e7b93e11bfbee..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_resnext.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnext.py
-
-
-class Block(nn.Module):
-    """Grouped convolution block."""
-
-    expansion = 2
-
-    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
-        super(Block, self).__init__()
-        group_width = cardinality * bottleneck_width
-        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(group_width)
-        self.conv2 = nn.Conv2d(
-            group_width,
-            group_width,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=cardinality,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(group_width)
-        self.conv3 = nn.Conv2d(
-            group_width, self.expansion * group_width, kernel_size=1, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(self.expansion * group_width)
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_planes != self.expansion * group_width:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(
-                    in_planes,
-                    self.expansion * group_width,
-                    kernel_size=1,
-                    stride=stride,
-                    bias=False,
-                ),
-                nn.BatchNorm2d(self.expansion * group_width),
-            )
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        out += self.shortcut(x)
-        out = F.relu(out)
-        return out
-
-
-class ResNeXt(nn.Module):
-    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
-        super(ResNeXt, self).__init__()
-        self.cardinality = cardinality
-        self.bottleneck_width = bottleneck_width
-        self.in_planes = 64
-
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.layer1 = self._make_layer(num_blocks[0], 1)
-        self.layer2 = self._make_layer(num_blocks[1], 2)
-        self.layer3 = self._make_layer(num_blocks[2], 2)
-        # self.layer4 = self._make_layer(num_blocks[3], 2)
-        self.linear = nn.Linear(cardinality * bottleneck_width * 8, num_classes)
-
-    def _make_layer(self, num_blocks, stride):
-        strides = [stride] + [1] * (num_blocks - 1)
-        layers = []
-        for stride in strides:
-            layers.append(
-                Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)
-            )
-            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
-        # Increase bottleneck_width by 2 after each stage.
-        self.bottleneck_width *= 2
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        # out = self.layer4(out)
-        out = F.avg_pool2d(out, 8)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def ResNeXt29_2x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=2, bottleneck_width=64)
-
-
-def ResNeXt29_4x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=4, bottleneck_width=64)
-
-
-def ResNeXt29_8x64d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=8, bottleneck_width=64)
-
-
-def ResNeXt29_32x4d():
-    return ResNeXt(num_blocks=[3, 3, 3], cardinality=32, bottleneck_width=4)
-
-
-def test_resnext():
-    load_pytorch_module_and_check(
-        ResNeXt29_2x64d, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_senet.py b/examples/x2oneflow/pytorch2oneflow/models/test_senet.py
deleted file mode 100644
index e5464661e59d01e6ad00f669ed61ca71d6d42f79..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_senet.py
+++ /dev/null
@@ -1,197 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/senet.py
-
-
-class BasicResidualSEBlock(nn.Module):
-
-    expansion = 1
-
-    def __init__(self, in_channels, out_channels, stride, r=16):
-        super().__init__()
-
-        self.residual = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels * self.expansion, 3, padding=1),
-            nn.BatchNorm2d(out_channels * self.expansion),
-            nn.ReLU(inplace=True),
-        )
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_channels != out_channels * self.expansion:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
-                nn.BatchNorm2d(out_channels * self.expansion),
-            )
-
-        self.squeeze = nn.AdaptiveAvgPool2d(1)
-        self.excitation = nn.Sequential(
-            nn.Linear(
-                out_channels * self.expansion, out_channels * self.expansion // r
-            ),
-            nn.ReLU(inplace=True),
-            nn.Linear(
-                out_channels * self.expansion // r, out_channels * self.expansion
-            ),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, x):
-        shortcut = self.shortcut(x)
-        residual = self.residual(x)
-
-        squeeze = self.squeeze(residual)
-        squeeze = squeeze.view(squeeze.size(0), -1)
-        excitation = self.excitation(squeeze)
-        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
-
-        x = residual * excitation.expand_as(residual) + shortcut
-
-        return F.relu(x)
-
-
-class BottleneckResidualSEBlock(nn.Module):
-
-    expansion = 4
-
-    def __init__(self, in_channels, out_channels, stride, r=16):
-        super().__init__()
-
-        self.residual = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, 1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels, 3, stride=stride, padding=1),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels * self.expansion, 1),
-            nn.BatchNorm2d(out_channels * self.expansion),
-            nn.ReLU(inplace=True),
-        )
-
-        self.squeeze = nn.AdaptiveAvgPool2d(1)
-        self.excitation = nn.Sequential(
-            nn.Linear(
-                out_channels * self.expansion, out_channels * self.expansion // r
-            ),
-            nn.ReLU(inplace=True),
-            nn.Linear(
-                out_channels * self.expansion // r, out_channels * self.expansion
-            ),
-            nn.Sigmoid(),
-        )
-
-        self.shortcut = nn.Sequential()
-        if stride != 1 or in_channels != out_channels * self.expansion:
-            self.shortcut = nn.Sequential(
-                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
-                nn.BatchNorm2d(out_channels * self.expansion),
-            )
-
-    def forward(self, x):
-
-        shortcut = self.shortcut(x)
-
-        residual = self.residual(x)
-        squeeze = self.squeeze(residual)
-        squeeze = squeeze.view(squeeze.size(0), -1)
-        excitation = self.excitation(squeeze)
-        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
-
-        x = residual * excitation.expand_as(residual) + shortcut
-
-        return F.relu(x)
-
-
-class SEResNet(nn.Module):
-    def __init__(self, block, block_num, class_num=100):
-        super().__init__()
-
-        self.in_channels = 64
-
-        self.pre = nn.Sequential(
-            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
-        )
-
-        self.stage1 = self._make_stage(block, block_num[0], 64, 1)
-        self.stage2 = self._make_stage(block, block_num[1], 128, 2)
-        self.stage3 = self._make_stage(block, block_num[2], 256, 2)
-        self.stage4 = self._make_stage(block, block_num[3], 512, 2)
-
-        self.linear = nn.Linear(self.in_channels, class_num)
-
-    def forward(self, x):
-        x = self.pre(x)
-
-        x = self.stage1(x)
-        x = self.stage2(x)
-        x = self.stage3(x)
-        x = self.stage4(x)
-
-        x = F.adaptive_avg_pool2d(x, 1)
-        x = x.view(x.size(0), -1)
-
-        x = self.linear(x)
-
-        return x
-
-    def _make_stage(self, block, num, out_channels, stride):
-
-        layers = []
-        layers.append(block(self.in_channels, out_channels, stride))
-        self.in_channels = out_channels * block.expansion
-
-        while num - 1:
-            layers.append(block(self.in_channels, out_channels, 1))
-            num -= 1
-
-        return nn.Sequential(*layers)
-
-
-def seresnet18():
-    return SEResNet(BasicResidualSEBlock, [2, 2, 2, 2])
-
-
-def seresnet34():
-    return SEResNet(BasicResidualSEBlock, [3, 4, 6, 3])
-
-
-def seresnet50():
-    return SEResNet(BottleneckResidualSEBlock, [3, 4, 6, 3])
-
-
-def seresnet101():
-    return SEResNet(BottleneckResidualSEBlock, [3, 4, 23, 3])
-
-
-def seresnet152():
-    return SEResNet(BottleneckResidualSEBlock, [3, 8, 36, 3])
-
-
-def test_senet():
-    load_pytorch_module_and_check(
-        seresnet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v1.py b/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v1.py
deleted file mode 100644
index 29cb95c2e224b90f1eea641d191e923509eec651..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v1.py
+++ /dev/null
@@ -1,136 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class ShuffleBlock(nn.Module):
-    def __init__(self, groups):
-        super(ShuffleBlock, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
-        N, C, H, W = x.size()
-        g = self.groups
-        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, in_planes, out_planes, stride, groups):
-        super(Bottleneck, self).__init__()
-        self.stride = stride
-
-        mid_planes = out_planes // 4
-        g = 1 if in_planes == 24 else groups
-
-        self.conv1 = nn.Conv2d(
-            in_planes, mid_planes, kernel_size=1, groups=g, bias=False
-        )
-        self.bn1 = nn.BatchNorm2d(mid_planes)
-        self.shuffle1 = ShuffleBlock(groups=g)
-        self.conv2 = nn.Conv2d(
-            mid_planes,
-            mid_planes,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=mid_planes,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(mid_planes)
-        self.conv3 = nn.Conv2d(
-            mid_planes, out_planes, kernel_size=1, groups=groups, bias=False
-        )
-        self.bn3 = nn.BatchNorm2d(out_planes)
-
-        self.shortcut = nn.Sequential()
-        if stride == 2:
-            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.shuffle1(out)
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = self.bn3(self.conv3(out))
-        res = self.shortcut(x)
-        out = (
-            F.relu(torch.cat([out, res], 1)) if self.stride == 2 else F.relu(out + res)
-        )
-        return out
-
-
-class ShuffleNet(nn.Module):
-    def __init__(self, cfg):
-        super(ShuffleNet, self).__init__()
-        out_planes = cfg["out_planes"]
-        num_blocks = cfg["num_blocks"]
-        groups = cfg["groups"]
-
-        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(24)
-        self.in_planes = 24
-        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
-        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
-        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
-        self.linear = nn.Linear(out_planes[2], 10)
-
-    def _make_layer(self, out_planes, num_blocks, groups):
-        layers = []
-        for i in range(num_blocks):
-            stride = 2 if i == 0 else 1
-            cat_planes = self.in_planes if i == 0 else 0
-            layers.append(
-                Bottleneck(
-                    self.in_planes,
-                    out_planes - cat_planes,
-                    stride=stride,
-                    groups=groups,
-                )
-            )
-            self.in_planes = out_planes
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-def ShuffleNetG2():
-    cfg = {"out_planes": [200, 400, 800], "num_blocks": [4, 8, 4], "groups": 2}
-    return ShuffleNet(cfg)
-
-
-def ShuffleNetG3():
-    cfg = {"out_planes": [240, 480, 960], "num_blocks": [4, 8, 4], "groups": 3}
-    return ShuffleNet(cfg)
-
-
-def test_shufflenet_v1_g2():
-    load_pytorch_module_and_check(
-        ShuffleNetG2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v2.py b/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v2.py
deleted file mode 100644
index 59752032623986478281a2d1577b13adaa1df191..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_shufflenet_v2.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# https://github.com/kuangliu/pytorch-cifar/blob/master/models/shufflenetv2.py
-
-
-class ShuffleBlock(nn.Module):
-    def __init__(self, groups=2):
-        super(ShuffleBlock, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
-        N, C, H, W = x.size()
-        g = self.groups
-        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
-
-
-class SplitBlock(nn.Module):
-    def __init__(self, ratio):
-        super(SplitBlock, self).__init__()
-        self.ratio = ratio
-
-    def forward(self, x):
-        c = int(x.size(1) * self.ratio)
-        return x[:, :c, :, :], x[:, c:, :, :]
-
-
-class BasicBlock(nn.Module):
-    def __init__(self, in_channels, split_ratio=0.5):
-        super(BasicBlock, self).__init__()
-        self.split = SplitBlock(split_ratio)
-        in_channels = int(in_channels * split_ratio)
-        self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(in_channels)
-        self.conv2 = nn.Conv2d(
-            in_channels,
-            in_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            groups=in_channels,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(in_channels)
-        self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(in_channels)
-        self.shuffle = ShuffleBlock()
-
-    def forward(self, x):
-        x1, x2 = self.split(x)
-        out = F.relu(self.bn1(self.conv1(x2)))
-        out = self.bn2(self.conv2(out))
-        out = F.relu(self.bn3(self.conv3(out)))
-        out = torch.cat([x1, out], 1)
-        out = self.shuffle(out)
-        return out
-
-
-class DownBlock(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(DownBlock, self).__init__()
-        mid_channels = out_channels // 2
-        # left
-        self.conv1 = nn.Conv2d(
-            in_channels,
-            in_channels,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=in_channels,
-            bias=False,
-        )
-        self.bn1 = nn.BatchNorm2d(in_channels)
-        self.conv2 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn2 = nn.BatchNorm2d(mid_channels)
-        # right
-        self.conv3 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn3 = nn.BatchNorm2d(mid_channels)
-        self.conv4 = nn.Conv2d(
-            mid_channels,
-            mid_channels,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=mid_channels,
-            bias=False,
-        )
-        self.bn4 = nn.BatchNorm2d(mid_channels)
-        self.conv5 = nn.Conv2d(mid_channels, mid_channels, kernel_size=1, bias=False)
-        self.bn5 = nn.BatchNorm2d(mid_channels)
-
-        self.shuffle = ShuffleBlock()
-
-    def forward(self, x):
-        # left
-        out1 = self.bn1(self.conv1(x))
-        out1 = F.relu(self.bn2(self.conv2(out1)))
-        # right
-        out2 = F.relu(self.bn3(self.conv3(x)))
-        out2 = self.bn4(self.conv4(out2))
-        out2 = F.relu(self.bn5(self.conv5(out2)))
-        # concat
-        out = torch.cat([out1, out2], 1)
-        out = self.shuffle(out)
-        return out
-
-
-class ShuffleNetV2(nn.Module):
-    def __init__(self, net_size=0.5):
-        super(ShuffleNetV2, self).__init__()
-        out_channels = configs[net_size]["out_channels"]
-        num_blocks = configs[net_size]["num_blocks"]
-
-        self.conv1 = nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1, bias=False)
-        self.bn1 = nn.BatchNorm2d(24)
-        self.in_channels = 24
-        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
-        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
-        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
-        self.conv2 = nn.Conv2d(
-            out_channels[2],
-            out_channels[3],
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=False,
-        )
-        self.bn2 = nn.BatchNorm2d(out_channels[3])
-        self.linear = nn.Linear(out_channels[3], 10)
-
-    def _make_layer(self, out_channels, num_blocks):
-        layers = [DownBlock(self.in_channels, out_channels)]
-        for i in range(num_blocks):
-            layers.append(BasicBlock(out_channels))
-            self.in_channels = out_channels
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        # out = F.max_pool2d(out, 3, stride=2, padding=1)
-        out = self.layer1(out)
-        out = self.layer2(out)
-        out = self.layer3(out)
-        out = F.relu(self.bn2(self.conv2(out)))
-        out = F.avg_pool2d(out, 4)
-        out = out.view(out.size(0), -1)
-        out = self.linear(out)
-        return out
-
-
-configs = {
-    0.5: {"out_channels": (48, 96, 192, 1024), "num_blocks": (3, 7, 3)},
-    1: {"out_channels": (116, 232, 464, 1024), "num_blocks": (3, 7, 3)},
-    1.5: {"out_channels": (176, 352, 704, 1024), "num_blocks": (3, 7, 3)},
-    2: {"out_channels": (224, 488, 976, 2048), "num_blocks": (3, 7, 3)},
-}
-
-
-def test_shufflenet_v2():
-    load_pytorch_module_and_check(
-        ShuffleNetV2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_squeezenet.py b/examples/x2oneflow/pytorch2oneflow/models/test_squeezenet.py
deleted file mode 100644
index 1bf3dfad2b898b1893e46bfa458c23060788a5a8..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_squeezenet.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_squeezenet():
-    load_pytorch_module_and_check(
-        torchvision.models.SqueezeNet,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow" 
-    )
-
-
-def test_squeezenet1_0():
-    load_pytorch_module_and_check(
-        torchvision.models.squeezenet1_0,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow" 
-    )
-
-
-def test_squeezenet1_1():
-    load_pytorch_module_and_check(
-        torchvision.models.squeezenet1_1,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow" 
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/models/test_vgg16.py b/examples/x2oneflow/pytorch2oneflow/models/test_vgg16.py
deleted file mode 100644
index 0e0286830d4cb6e8e1c01f3ee014e54744024cd9..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/models/test_vgg16.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torchvision
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_vgg16():
-    load_pytorch_module_and_check(
-        torchvision.models.vgg16,
-        input_size=(1, 3, 224, 224),
-        train_flag=False,
-        flow_weight_dir="/tmp/oneflow"
-    )
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_activations.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_activations.py
deleted file mode 100644
index c43a1a7b45bbf4101a9e89e2140be7cab0275f2e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_activations.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_relu():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.relu(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_array.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_array.py
deleted file mode 100644
index 331ce345719c5f78f870949b6f60f86c60e83301..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_array.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_concat():
-    class Net(nn.Module):
-        def forward(self, x):
-            y = x * 3
-            return torch.cat((x, y))
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_concat_with_axis():
-    class Net(nn.Module):
-        def forward(self, x):
-            y = x * 3
-            return torch.cat((x, y), dim=1)
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_unsqueeze():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.unsqueeze(x, 2)
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_transpose():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.transpose(x, 1, 3)
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_gather():
-    class Net(nn.Module):
-        def forward(self, x):
-            return x[1]
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_tensor_index():
-    class Net(nn.Module):
-        def forward(self, x):
-            return x[0, 1:3, :1, 2:]
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_clip.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_clip.py
deleted file mode 100644
index 188eede75e0c4f88abd8b231821fa04c395e438c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_clip.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_clip_min_max():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.clamp(x, min=-0.5, max=3.1)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_clip_min():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.clamp(x, min=-2.2)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_clip_max():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.clamp(x, max=1.2)
-            return x
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_conv.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_conv.py
deleted file mode 100644
index f37437cd8dc29a279da47457d96a136fe9c0fb78..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_conv.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_conv2d_k3s1p1():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2d(4, 5, 3, padding=1)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s1p0():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2d(4, 5, 3, padding=0)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s2p0():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2d(4, 5, 3, stride=2, padding=0)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-
-def test_conv2d_k3s2p0g2():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2d(4, 6, 3, stride=1, padding=1, groups=2)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-
-def test_conv2d_k3s2p0g2d2():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = nn.Conv2d(4, 6, 3, stride=1, padding=1, groups=2, dilation=2)
-
-        def forward(self, x):
-            x = self.conv(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 13, 12))
-
-
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_flatten.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_flatten.py
deleted file mode 100644
index ea560e1f1a7543c24dd5a471e6d0e375768e9cea..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_flatten.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_flatten():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.flatten(x, 1)
-            return x
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_math.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_math.py
deleted file mode 100644
index 2185c34a47c72dec24d811af1d668371182981e2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_math.py
+++ /dev/null
@@ -1,451 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-import numpy as np
-from torch import nn
-import torch.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_add():
-    class Net(nn.Module):
-        def forward(self, x):
-            x += x
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_sub():
-    class Net(nn.Module):
-        def forward(self, x):
-            x -= 2
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_mul():
-    class Net(nn.Module):
-        def forward(self, x):
-            x *= x
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_div():
-    class Net(nn.Module):
-        def forward(self, x):
-            x /= 3
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_sqrt():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.sqrt(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_min_val=0)
-
-
-def test_pow():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.pow(x, 3)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_tanh():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.tanh(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_sigmoid():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.sigmoid(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_erf():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.erf(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_clip():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.clamp(x, -1, 2)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-# def test_cast():
-#     class Net(nn.Module):
-#         def forward(self, x):
-#             x = x.int()
-#             return x
-
-#     load_pytorch_module_and_check(Net)
-
-def test_abs():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.abs(x)
-            return x
-    
-    load_pytorch_module_and_check(Net)
-
-def test_acos():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.acos(x)
-            return x
-    
-    load_pytorch_module_and_check(Net)
-
-def test_add_v2():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.add(x, 0.5)
-            return x
-    
-    load_pytorch_module_and_check(Net)
-
-def test_addmm():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.addmm(x, x, x)
-    
-    load_pytorch_module_and_check(Net, input_size=(2, 2))
-
-def test_arange():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.arange(5)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_argmax():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.argmax(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_argmin():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.argmin(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_asin():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.asin(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_atan():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.atan(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_baddbmm():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.baddbmm(x, x, x)
-    
-    load_pytorch_module_and_check(Net, input_size=(2, 2, 2))
-
-def test_and():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.baddbmm(x, x, x)
-    
-    load_pytorch_module_and_check(Net, input_size=(2, 2, 2))
-
-def test_ceil():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.ceil(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_cos():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.cos(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_elu():
-    class Net(nn.Module):
-        def forward(self, x):
-            m = nn.ELU()
-            return m(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_eq():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.eq(x, x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_exp():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.exp(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_floor():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.floor(x)
-    
-    load_pytorch_module_and_check(Net)
-
-
-def test_floor_divide():
-    class Net(nn.Module):
-        def forward(self, x):
-            a = torch.tensor([4.0, 3.0])
-            b = torch.tensor([2.0, 2.0])
-            return torch.floor_divide(a, b)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_full():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.full((2, 3), 1.5)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_full_like():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.full_like(x, 1.5)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_gelu():
-    class Net(nn.Module):
-        def forward(self, x):
-            m = nn.GELU()
-            return m(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_hardtanh():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.nn.functional.hardtanh(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_leaky_relu():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.nn.functional.leaky_relu(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_log():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.log(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_log1p():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.log1p(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_log2():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.log2(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_log_softmax():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.nn.functional.log_softmax(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_logsumexp():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.logsumexp(x, dim=1)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_max():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.max(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_min():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.min(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_mean():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.mean(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_mm():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.mm(x, x)
-    
-    load_pytorch_module_and_check(Net, input_size=(2, 2))
-
-def test_neg():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.neg(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_permute():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = x.permute(2, 0, 1)
-            return x
-    load_pytorch_module_and_check(Net, input_size=(2, 3, 5))
-
-def test_prod():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.prod(x)
-    
-    load_pytorch_module_and_check(Net, input_size=(3, ))
-
-def test_reshape_as():
-    class Net(nn.Module):
-        def forward(self, x):
-            return x.reshape_as(x)
-    load_pytorch_module_and_check(Net, input_size=(2, 3, 5))
-
-def test_round():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.round(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_rsqrt():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.rsqrt(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_rsqrt():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.rsqrt(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_sign():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.sign(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_sin():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.sin(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_softplus():
-    class Net(nn.Module):
-        def forward(self, x):
-            m = nn.Softplus()
-            return m(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_squeeze():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.squeeze(x)
-    
-    load_pytorch_module_and_check(Net, input_size=(2, 1, 2, 1))
-
-def test_tan():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.tan(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_tanh():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.tanh(x)
-    
-    load_pytorch_module_and_check(Net)
-
-def test_prelu():
-    class Net(nn.Module):
-        def forward(self, x):
-            m = nn.PReLU(num_parameters=4)
-            return m(x)
-    
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_norm.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_norm.py
deleted file mode 100644
index 8493c7f272a296e062683490433bf32e53a9c64f..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_norm.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-import torch.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_bn():
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.bn = nn.BatchNorm2d(4)
-
-        def forward(self, x):
-            x = self.bn(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-# def test_gn_v1():
-#     class Net(nn.Module):
-#         def forward(self, x):
-#             # Separate 6 channels into 3 groups
-#             m = nn.GroupNorm(6, 6)
-#             output = m(x)
-#             return output
-    
-#     load_pytorch_module_and_check(Net, input_size=(20, 6, 10, 10))
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_pad.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_pad.py
deleted file mode 100644
index c0da1c92734d11a3e4aab2fb5dee6bebc69444a5..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_pad.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-import torch.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-def test_pad():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = F.pad(x, (2, 3))
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_pad_with_value():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = F.pad(x, (2, 3), value=3.5)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_constant_pad2d():
-    class Net(nn.Module):
-        def forward(self, x):
-            m = nn.ConstantPad2d((3, 0, 2, 1), 3.5)
-            x = m(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-test_constant_pad2d()
\ No newline at end of file
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_pooling.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_pooling.py
deleted file mode 100644
index 1a62f3f4cb702763d18b1b180c6cd22ea8ce8280..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_pooling.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def _test_k3s1p1(pt_pool):
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(3, stride=1, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_maxpool_k3s1p1():
-    _test_k3s1p1(nn.MaxPool2d)
-
-
-def test_avgpool_k3s1p1():
-    _test_k3s1p1(nn.AvgPool2d)
-
-
-def _test_k4s2p2(pt_pool):
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(4, stride=2, padding=2)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k4s2p2():
-    _test_k4s2p2(nn.MaxPool2d)
-
-
-def test_avgpool_k4s2p3():
-    _test_k4s2p2(nn.AvgPool2d)
-
-
-def _test_k43s2p1(pt_pool):
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool((4, 3), stride=2, padding=1)
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p1():
-    _test_k43s2p1(nn.MaxPool2d)
-
-
-def test_avgpool_k43s2p1():
-    _test_k43s2p1(nn.AvgPool2d)
-
-
-def _test_k43s2p21(pt_pool):
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool((4, 3), stride=2, padding=(2, 1))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p21():
-    _test_k43s2p21(nn.MaxPool2d)
-
-
-def test_avgpool_k43s2p21():
-    _test_k43s2p21(nn.AvgPool2d)
-
-
-def _test_global_pooling(pt_pool):
-    class Net(nn.Module):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool((1, 1))
-
-        def forward(self, x):
-            x = self.pool(x)
-            return x
-
-    load_pytorch_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_global_avg_pooling():
-    _test_global_pooling(nn.AdaptiveAvgPool2d)
-
-
-def test_global_max_pooling():
-    _test_global_pooling(nn.AdaptiveMaxPool2d)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_reduction.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_reduction.py
deleted file mode 100644
index 34c6292c3b266174d612648e742d5a682ce50c30..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_reduction.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_reduce_mean():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.mean(x)
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_reduce_mean_axis():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.mean(x, dim=2)
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_reduce_mean_axis_keepdim():
-    class Net(nn.Module):
-        def forward(self, x):
-            return torch.mean(x, dim=3, keepdim=True)
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_reshape.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_reshape.py
deleted file mode 100644
index 0bd717452fc4e08760eae40beb685fe9d8cfcc82..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_reshape.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-# TODO(daquexian): add tests for 0 and -1 after flow.reshape supports it
-def test_reshape():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = torch.reshape(x, (5, 12))
-            return x
-
-    load_pytorch_module_and_check(Net, (2, 5, 3, 2))
diff --git a/examples/x2oneflow/pytorch2oneflow/nodes/test_softmax.py b/examples/x2oneflow/pytorch2oneflow/nodes/test_softmax.py
deleted file mode 100644
index 45fc20d31777245628c924de178321e11a9b9095..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/pytorch2oneflow/nodes/test_softmax.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import torch
-from torch import nn
-import torch.nn.functional as F
-
-from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
-
-
-def test_softmax():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = F.softmax(x)
-            return x
-
-    load_pytorch_module_and_check(Net)
-
-
-def test_softmax_with_axis():
-    class Net(nn.Module):
-        def forward(self, x):
-            x = F.softmax(x, dim=1)
-            return x
-
-    load_pytorch_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_densenet.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_densenet.py
deleted file mode 100644
index da5e6787127443401724aa8cb0a525d83c038603..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_densenet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.densenet import DenseNet121
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_DenseNet121():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.DenseNet121 = DenseNet121(weights=None)
-        def call(self, x):
-            x = self.DenseNet121(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_effcientnet.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_effcientnet.py
deleted file mode 100644
index a3ecaa7eeef2b5ce65d849a84ebefa5b11738c9e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_effcientnet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.efficientnet import EfficientNetB0
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_EfficientNetB0():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.EfficientNetB0 = EfficientNetB0(weights=None)
-        def call(self, x):
-            x = self.EfficientNetB0(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_inception_v3.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_inception_v3.py
deleted file mode 100644
index 8742c03f578833727f647f12f13a79d4f1b076ca..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_inception_v3.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.inception_v3 import InceptionV3
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_InceptionV3():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.InceptionV3 = InceptionV3(weights=None)
-        def call(self, x):
-            x = self.InceptionV3(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet.py
deleted file mode 100644
index 75e23f157adde4bcdf00f35e3b3530f1548ab7a0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.mobilenet import MobileNet
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNet():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNet = MobileNet(weights=None)
-        def call(self, x):
-            x = self.MobileNet(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v2.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v2.py
deleted file mode 100644
index 8befe74e17405ff08596a974fce9fc6c10515b2d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v2.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNetV2():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNetV2 = MobileNetV2(weights=None)
-        def call(self, x):
-            x = self.MobileNetV2(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v3.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v3.py
deleted file mode 100644
index 2a1b35cd60102d7073e973bb21290037409c2d88..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_mobilenet_v3.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.python.keras.applications.mobilenet_v3 import MobileNetV3Small
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNetV3():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNetV3 = MobileNetV3Small(weights=None)
-        def call(self, x):
-            x = self.MobileNetV3(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnet.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnet.py
deleted file mode 100644
index 893f8e86c85ae473f94f672e73c61713ef12fb6f..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnet.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.resnet import ResNet50, ResNet101
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_resnet50():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.resnet50 = ResNet50(weights=None)
-        def call(self, x):
-            x = self.resnet50(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnetv2.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnetv2.py
deleted file mode 100644
index 180130d8c7c0bf2d06cfeb8855ad5e686748d7b7..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_resnetv2.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.resnet_v2 import ResNet50V2
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_resnet5V2():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.resnet50v2 = ResNet50V2(weights=None)
-        def call(self, x):
-            x = self.resnet50v2(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_vggnet.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_vggnet.py
deleted file mode 100644
index 7894d0fed1d099fb543d11790d52d001d610b399..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_vggnet.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.vgg16 import VGG16
-from tensorflow.keras.applications.vgg19 import VGG19
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_vgg16():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.vgg = VGG16(weights=None)
-        def call(self, x):
-            x = self.vgg(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_xception.py b/examples/x2oneflow/tensorflow2oneflow/code_gen/test_xception.py
deleted file mode 100644
index 4c56cc071010a10510dfe952b40d575920ced952..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/code_gen/test_xception.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.xception import Xception
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_Xception():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.Xception = Xception(weights=None)
-        def call(self, x):
-            x = self.Xception(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True)
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_densenet.py b/examples/x2oneflow/tensorflow2oneflow/models/test_densenet.py
deleted file mode 100644
index 1b34393c9db7e8e4e5b0ae5666ed132bfaa47740..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_densenet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.densenet import DenseNet121
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_DenseNet121():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.DenseNet121 = DenseNet121(weights=None)
-        def call(self, x):
-            x = self.DenseNet121(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_effcientnet.py b/examples/x2oneflow/tensorflow2oneflow/models/test_effcientnet.py
deleted file mode 100644
index 28a8756d5651cded488c4aa732e27b3b39b55f6c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_effcientnet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.efficientnet import EfficientNetB0
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_EfficientNetB0():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.EfficientNetB0 = EfficientNetB0(weights=None)
-        def call(self, x):
-            x = self.EfficientNetB0(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_inception_v3.py b/examples/x2oneflow/tensorflow2oneflow/models/test_inception_v3.py
deleted file mode 100644
index e427c0e787b771017db48686a5b3f53c8febff48..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_inception_v3.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.inception_v3 import InceptionV3
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_InceptionV3():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.InceptionV3 = InceptionV3(weights=None)
-        def call(self, x):
-            x = self.InceptionV3(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet.py b/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet.py
deleted file mode 100644
index 77b410d685192d6ad8bd410e1faf2dc79ca6ddf2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.mobilenet import MobileNet
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNet():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNet = MobileNet(weights=None)
-        def call(self, x):
-            x = self.MobileNet(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v2.py b/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v2.py
deleted file mode 100644
index 69a05cc8d9b95124ac38fd58a030d2f4a3bb7470..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v2.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNetV2():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNetV2 = MobileNetV2(weights=None)
-        def call(self, x):
-            x = self.MobileNetV2(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v3.py b/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v3.py
deleted file mode 100644
index 46cfca7086ad30f280a33e827ef9dbce3cbbbb61..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_mobilenet_v3.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.python.keras.applications.mobilenet_v3 import MobileNetV3Small
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_MobileNetV3():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.MobileNetV3 = MobileNetV3Small(weights=None)
-        def call(self, x):
-            x = self.MobileNetV3(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_resnet.py b/examples/x2oneflow/tensorflow2oneflow/models/test_resnet.py
deleted file mode 100644
index d7f6168bf0784a3392605961fe8109799e73f2bb..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_resnet.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.resnet import ResNet50, ResNet101
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_resnet50():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.resnet50 = ResNet50(weights=None)
-        def call(self, x):
-            x = self.resnet50(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
-
-def test_resnet101():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.resnet101 = ResNet101(weights=None)
-        def call(self, x):
-            x = self.resnet101(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_resnetv2.py b/examples/x2oneflow/tensorflow2oneflow/models/test_resnetv2.py
deleted file mode 100644
index feb2382ba13fc0f82f987abf4b1482bb395491b2..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_resnetv2.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.resnet_v2 import ResNet50V2
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_resnet5V2():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.resnet50v2 = ResNet50V2(weights=None)
-        def call(self, x):
-            x = self.resnet50v2(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_vggnet.py b/examples/x2oneflow/tensorflow2oneflow/models/test_vggnet.py
deleted file mode 100644
index f862fa6789f38fd57fe9b742e87f4326b8ea0590..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_vggnet.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.vgg16 import VGG16
-from tensorflow.keras.applications.vgg19 import VGG19
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_vgg16():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.vgg = VGG16(weights=None)
-        def call(self, x):
-            x = self.vgg(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
-
-def test_vgg19():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.vgg = VGG19(weights=None)
-        def call(self, x):
-            x = self.vgg(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 224, 224, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/models/test_xception.py b/examples/x2oneflow/tensorflow2oneflow/models/test_xception.py
deleted file mode 100644
index ec6310c53df1fb0d0f8397c38d4d15ecd3aff00e..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/models/test_xception.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-from tensorflow.keras.applications.xception import Xception
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_Xception():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.Xception = Xception(weights=None)
-        def call(self, x):
-            x = self.Xception(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 299, 299, 3), train_flag=False, flow_weight_dir="/tmp/oneflow")
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_activations.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_activations.py
deleted file mode 100644
index c3f47f7133981766e988098dfd179902f542017a..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_activations.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_relu():
-    class Net(tf.keras.Model):
-
-        def call(self, x, training=False):
-            x = tf.keras.activations.relu(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_relu6():
-    class Net(tf.keras.Model):
-
-        def call(self, x, training=False):
-            x = tf.keras.activations.relu(x, max_value=6.0)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_swish():
-    class Net(tf.keras.Model):
-
-        def call(self, x, training=False):
-            x = tf.keras.activations.swish(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_leaky_relu():
-    class Net(tf.keras.Model):
-
-        def call(self, x, training=False):
-            x = tf.nn.leaky_relu(x, alpha=0.2)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_array.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_array.py
deleted file mode 100644
index bc33510f4e689d500c56ffbade4e1939c47ae266..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_array.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_concat():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            y = x * 3
-            return tf.keras.layers.Concatenate()([x, y])
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_concat_with_axis():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            y = x * 3
-            return tf.keras.layers.Concatenate(axis=1)([x, y])
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_unsqueeze():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.expand_dims(x, axis=2)
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_transpose():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            # shape = x.shape
-            return tf.transpose(x, perm=[0, 3, 1, 2])
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_gather():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return x[1]
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_tensor_index():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return x[0, 1:3, :1, 2:4]
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_shape():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.shape(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_const():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.constant([1, 2, 3, 4, 5])
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_identity():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.identity(x)
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_batch_norm.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_batch_norm.py
deleted file mode 100644
index cb58d5debedb1bb4d2076fb802a6177cc5c47330..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_batch_norm.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_bn():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.bn = tf.keras.layers.BatchNormalization(axis=1, trainable=False)
-
-        def call(self, x):
-            x = self.bn(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_bn_withoutscale():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.bn = tf.keras.layers.BatchNormalization(axis=1, scale=False, trainable=False)
-
-        def call(self, x):
-            x = self.bn(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_clip.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_clip.py
deleted file mode 100644
index dfead0f03bc22fa62fe590ed50181e26d511a355..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_clip.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_clip_min_max():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.clip_by_value(x, clip_value_min=-0.5, clip_value_max=3.1)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_clip_min():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.clip_by_value(x, clip_value_min=-2.2, clip_value_max=float('inf'))
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_clip_max():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.clip_by_value(x, clip_value_max=1.2, clip_value_min=float('-inf'))
-            return x
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_conv.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_conv.py
deleted file mode 100644
index f3c6003c955a1a536419e64ff7419c2fa1e611b1..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_conv.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_conv2d_k3s1p1():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = tf.keras.layers.Conv2D(5, 3, padding="same")
-
-        def call(self, x):
-            x = self.conv(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s1p0():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = tf.keras.layers.Conv2D(5, 3, padding="valid")
-
-        def call(self, x):
-            x = self.conv(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_conv2d_k3s2p0():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = tf.keras.layers.Conv2D(5, 3, strides=(2, 2), padding="valid")
-
-        def call(self, x):
-            x = self.conv(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-
-# def test_conv2d_k3s2p0g2():
-#     class Net(tf.keras.Model):
-#         def __init__(self):
-#             super(Net, self).__init__()
-#             self.conv = tf.keras.layers.Conv2D(1, 3, strides=(1, 1), padding="valid", groups=6)
-
-#         def call(self, x):
-#             x = self.conv(x)
-#             return x
-
-#     load_tensorflow2_module_and_check(Net, input_size=(2, 4, 9, 6))
-
-
-# def test_conv2d_k3s2p0g2d2():
-#     class Net(tf.keras.Model):
-#         def __init__(self):
-#             super(Net, self).__init__()
-#             self.conv = tf.keras.layers.Conv2D(6, 3, strides=(1, 1), padding="valid", groups=2, dilation_rate=2)
-
-#         def call(self, x):
-#             x = self.conv(x)
-#             return x
-
-#     load_tensorflow2_module_and_check(Net, input_size=(2, 4, 13, 12))
-
-
-def test_depthwise_conv2d_k3s2p0():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = tf.keras.layers.DepthwiseConv2D(3, strides=(2, 2), padding="valid")
-
-        def call(self, x):
-            x = self.conv(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 9, 7))
-
-def test_depthwise_conv2d_k3s2p1():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.conv = tf.keras.layers.DepthwiseConv2D(3, strides=(2, 2), padding="same")
-
-        def call(self, x):
-            x = self.conv(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 9, 7))
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_flatten.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_flatten.py
deleted file mode 100644
index 3cd77b14ffaef3082327a19439024446f40c57da..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_flatten.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_flatten():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            flatten = tf.keras.layers.Flatten()
-            x = flatten(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_math.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_math.py
deleted file mode 100644
index 0a89bc6c6415de9bfde7fa29e3d7d9f999b4f17c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_math.py
+++ /dev/null
@@ -1,255 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-import numpy as np
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_add():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x += x
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_sub():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x -= 2
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_mul():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x *= x
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_div():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x /= 3
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_sqrt():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.math.sqrt(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_min_val=0)
-
-
-def test_pow():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.math.pow(x, 3)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_tanh():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.keras.activations.tanh(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_sigmoid():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            m = tf.keras.activations.sigmoid(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_erf():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.math.erf(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-# def test_cast():
-#     class Net(tf.keras.Model):
-#         def call(self, x):
-#             x = tf.cast(x, tf.int32)
-#             return x
-
-#     load_tensorflow2_module_and_check(Net)
-
-def test_abs():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.math.abs(x)
-            return x
-    
-    load_tensorflow2_module_and_check(Net)
-
-def test_exp():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.math.exp(x)
-            return x
-    
-    load_tensorflow2_module_and_check(Net)
-
-def test_rsqrt():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           x = tf.math.rsqrt(x)
-           return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_maximum():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           x = tf.math.maximum(x, x*2)
-           return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_minimum():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           x = tf.math.minimum(x, x*2)
-           return x
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_floordiv():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           x = tf.math.floordiv(x*1.5, x)
-           return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_squared_difference():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.squared_difference(x, x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_argmax():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.argmax(x, axis=1)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_slice():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.slice(x, [1, 0, 0, 0], [1, 1, 2, 4])
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_squeeze():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.squeeze(x)
-
-    load_tensorflow2_module_and_check(Net, input_size=(1, 2, 1, 2))
-
-def test_range():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.range(start=3, limit=18, delta=3)
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_fill():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.fill([2, 3], 9)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_floor():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.floor(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_softplus():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.softplus(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_greater():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.greater(2*x, x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_negative():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.negative(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_ceil():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.ceil(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_where():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.where([True, False, False, True], [1,2,3,4], [100,200,300,400])
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_size():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.size(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-def test_equal():
-    class Net(tf.keras.Model):
-       def call(self, x):
-           return tf.math.equal(x, x)
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_pad.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_pad.py
deleted file mode 100644
index 81be6574197bedd1fd3db77538357e5a5a76ecf1..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_pad.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_pad():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.pad(x, paddings=tf.constant([[0, 0], [2, 2], [3, 3], [0, 0]]), mode="CONSTANT")
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_pad_with_value():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            x = tf.pad(x, paddings=tf.constant([[0, 0], [2, 2], [3, 3], [0, 0]]), mode="CONSTANT", constant_values=3.5)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_pooling.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_pooling.py
deleted file mode 100644
index 6b5ffad32ed189d0757801df26882f0a03b8b26c..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_pooling.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def _test_k3s1p1(pt_pool):
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(pool_size=(3,3), strides=(1,1), padding="same")
-
-        def call(self, x):
-            x = self.pool(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 3, 5))
-
-
-def test_maxpool_k3s1p1():
-    _test_k3s1p1(tf.keras.layers.MaxPool2D)
-
-
-def test_avgpool_k3s1p1():
-    _test_k3s1p1(tf.keras.layers.AveragePooling2D)
-
-
-def _test_k4s2p2(pt_pool):
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(pool_size=(4,4), strides=(2,2), padding="same")
-
-        def call(self, x):
-            x = self.pool(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k4s2p2():
-    _test_k4s2p2(tf.keras.layers.MaxPool2D)
-
-
-def test_avgpool_k4s2p3():
-    _test_k4s2p2(tf.keras.layers.AveragePooling2D)
-
-
-def _test_k43s2p1(pt_pool):
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(pool_size=(4, 3), strides=(2,2), padding="same")
-
-        def call(self, x):
-            x = self.pool(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p1():
-    _test_k43s2p1(tf.keras.layers.MaxPool2D)
-
-
-def test_avgpool_k43s2p1():
-    _test_k43s2p1(tf.keras.layers.AveragePooling2D)
-
-
-def _test_k43s2p21(pt_pool):
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool(pool_size=(4, 3), strides=(2,2), padding="same")
-
-        def call(self, x):
-            x = self.pool(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_maxpool_k43s2p21():
-    _test_k43s2p21(tf.keras.layers.MaxPool2D)
-
-
-def test_avgpool_k43s2p21():
-    _test_k43s2p21(tf.keras.layers.AveragePooling2D)
-
-
-def _test_global_pooling(pt_pool):
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.pool = pt_pool()
-
-        def call(self, x):
-            x = self.pool(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, input_size=(2, 4, 10, 9))
-
-
-def test_global_avg_pooling():
-    _test_global_pooling(tf.keras.layers.GlobalAveragePooling2D)
-
-
-def test_global_max_pooling():
-    _test_global_pooling(tf.keras.layers.GlobalMaxPool2D)
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_reduction.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_reduction.py
deleted file mode 100644
index b4bd239965201fe018f6e158332f40806a46c5d0..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_reduction.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-def test_reduce_mean():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.math.reduce_mean(x)
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_reduce_mean_axis():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.math.reduce_mean(x, axis=1)
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_reduce_mean_axis_keepdim():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            return tf.math.reduce_mean(x, axis=3, keepdims=True)
-
-    load_tensorflow2_module_and_check(Net)
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_reshape.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_reshape.py
deleted file mode 100644
index 2f7f79768bd2b097eed87472bf1ce387780aa793..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_reshape.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-# TODO(daquexian): add tests for 0 and -1 after flow.reshape supports it
-def test_reshape():
-    class Net(tf.keras.Model):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.reshape = tf.keras.layers.Reshape(target_shape=(3, 10))
-        def call(self, x):
-            x = self.reshape(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net, (2, 5, 3, 2))
-
diff --git a/examples/x2oneflow/tensorflow2oneflow/nodes/test_softmax.py b/examples/x2oneflow/tensorflow2oneflow/nodes/test_softmax.py
deleted file mode 100644
index 884743e45ecc601515377f16d9c9d626b7ed8c8d..0000000000000000000000000000000000000000
--- a/examples/x2oneflow/tensorflow2oneflow/nodes/test_softmax.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import tensorflow as tf
-
-from oneflow_onnx.x2oneflow.util import load_tensorflow2_module_and_check
-
-
-def test_softmax():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            m = tf.keras.layers.Softmax()
-            x = m(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
-
-def test_softmax_with_axis():
-    class Net(tf.keras.Model):
-        def call(self, x):
-            m = tf.keras.layers.Softmax(axis=1)
-            x = m(x)
-            return x
-
-    load_tensorflow2_module_and_check(Net)
-
diff --git a/nchw2nhwc_tool/README.md b/nchw2nhwc_tool/README.md
deleted file mode 100644
index e5510b8d1460d5a6a59032c33853170ed4ffea36..0000000000000000000000000000000000000000
--- a/nchw2nhwc_tool/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-### nchw2nhwc_tool
-
-#### 依赖
-- oneflow
-
-#### 执行指令
-
-- 模型转换
-
-```
-python3 nchw2nhwc.py --input_model_dir="./resnet50" --output_model_dir="./resnet50_nhwc"
-```
-
-- 模型推理
-
-```
-对于NCHW：python3 inference.py --log_dir="./log" --model_load_dir="./resnet50" --image_path="./fish.jpg" --channel_last=False
-
-对于NHWC: python3 inference.py --log_dir="./log" --model_load_dir="./resnet50_nhwc" --image_path="./fish.jpg" --channel_last=True
-```
-
-## TODO
-
-- [x] 完成NCHW->NHWC模型转换脚本
-- [x] 基于ResNet50用转换后的权重跑通网络，验证正确性
-- [x] 转换StyleNet地权重，并验证正确性。
-- [ ] 合并PR
\ No newline at end of file
diff --git a/nchw2nhwc_tool/imagenet1000_clsidx_to_labels.py b/nchw2nhwc_tool/imagenet1000_clsidx_to_labels.py
deleted file mode 100644
index 0feaeb1b8e37824a6b05af3f68f91ca928b32ea0..0000000000000000000000000000000000000000
--- a/nchw2nhwc_tool/imagenet1000_clsidx_to_labels.py
+++ /dev/null
@@ -1,1014 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-clsidx_2_labels = {
-  0: 'tench, Tinca tinca',
-  1: 'goldfish, Carassius auratus',
-  2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
-  3: 'tiger shark, Galeocerdo cuvieri',
-  4: 'hammerhead, hammerhead shark',
-  5: 'electric ray, crampfish, numbfish, torpedo',
-  6: 'stingray',
-  7: 'cock',
-  8: 'hen',
-  9: 'ostrich, Struthio camelus',
-  10: 'brambling, Fringilla montifringilla',
-  11: 'goldfinch, Carduelis carduelis',
-  12: 'house finch, linnet, Carpodacus mexicanus',
-  13: 'junco, snowbird',
-  14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
-  15: 'robin, American robin, Turdus migratorius',
-  16: 'bulbul',
-  17: 'jay',
-  18: 'magpie',
-  19: 'chickadee',
-  20: 'water ouzel, dipper',
-  21: 'kite',
-  22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
-  23: 'vulture',
-  24: 'great grey owl, great gray owl, Strix nebulosa',
-  25: 'European fire salamander, Salamandra salamandra',
-  26: 'common newt, Triturus vulgaris',
-  27: 'eft',
-  28: 'spotted salamander, Ambystoma maculatum',
-  29: 'axolotl, mud puppy, Ambystoma mexicanum',
-  30: 'bullfrog, Rana catesbeiana',
-  31: 'tree frog, tree-frog',
-  32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
-  33: 'loggerhead, loggerhead turtle, Caretta caretta',
-  34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
-  35: 'mud turtle',
-  36: 'terrapin',
-  37: 'box turtle, box tortoise',
-  38: 'banded gecko',
-  39: 'common iguana, iguana, Iguana iguana',
-  40: 'American chameleon, anole, Anolis carolinensis',
-  41: 'whiptail, whiptail lizard',
-  42: 'agama',
-  43: 'frilled lizard, Chlamydosaurus kingi',
-  44: 'alligator lizard',
-  45: 'Gila monster, Heloderma suspectum',
-  46: 'green lizard, Lacerta viridis',
-  47: 'African chameleon, Chamaeleo chamaeleon',
-  48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
-  49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
-  50: 'American alligator, Alligator mississipiensis',
-  51: 'triceratops',
-  52: 'thunder snake, worm snake, Carphophis amoenus',
-  53: 'ringneck snake, ring-necked snake, ring snake',
-  54: 'hognose snake, puff adder, sand viper',
-  55: 'green snake, grass snake',
-  56: 'king snake, kingsnake',
-  57: 'garter snake, grass snake',
-  58: 'water snake',
-  59: 'vine snake',
-  60: 'night snake, Hypsiglena torquata',
-  61: 'boa constrictor, Constrictor constrictor',
-  62: 'rock python, rock snake, Python sebae',
-  63: 'Indian cobra, Naja naja',
-  64: 'green mamba',
-  65: 'sea snake',
-  66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
-  67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
-  68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
-  69: 'trilobite',
-  70: 'harvestman, daddy longlegs, Phalangium opilio',
-  71: 'scorpion',
-  72: 'black and gold garden spider, Argiope aurantia',
-  73: 'barn spider, Araneus cavaticus',
-  74: 'garden spider, Aranea diademata',
-  75: 'black widow, Latrodectus mactans',
-  76: 'tarantula',
-  77: 'wolf spider, hunting spider',
-  78: 'tick',
-  79: 'centipede',
-  80: 'black grouse',
-  81: 'ptarmigan',
-  82: 'ruffed grouse, partridge, Bonasa umbellus',
-  83: 'prairie chicken, prairie grouse, prairie fowl',
-  84: 'peacock',
-  85: 'quail',
-  86: 'partridge',
-  87: 'African grey, African gray, Psittacus erithacus',
-  88: 'macaw',
-  89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
-  90: 'lorikeet',
-  91: 'coucal',
-  92: 'bee eater',
-  93: 'hornbill',
-  94: 'hummingbird',
-  95: 'jacamar',
-  96: 'toucan',
-  97: 'drake',
-  98: 'red-breasted merganser, Mergus serrator',
-  99: 'goose',
-  100: 'black swan, Cygnus atratus',
-  101: 'tusker',
-  102: 'echidna, spiny anteater, anteater',
-  103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
-  104: 'wallaby, brush kangaroo',
-  105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
-  106: 'wombat',
-  107: 'jellyfish',
-  108: 'sea anemone, anemone',
-  109: 'brain coral',
-  110: 'flatworm, platyhelminth',
-  111: 'nematode, nematode worm, roundworm',
-  112: 'conch',
-  113: 'snail',
-  114: 'slug',
-  115: 'sea slug, nudibranch',
-  116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
-  117: 'chambered nautilus, pearly nautilus, nautilus',
-  118: 'Dungeness crab, Cancer magister',
-  119: 'rock crab, Cancer irroratus',
-  120: 'fiddler crab',
-  121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
-  122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
-  123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
-  124: 'crayfish, crawfish, crawdad, crawdaddy',
-  125: 'hermit crab',
-  126: 'isopod',
-  127: 'white stork, Ciconia ciconia',
-  128: 'black stork, Ciconia nigra',
-  129: 'spoonbill',
-  130: 'flamingo',
-  131: 'little blue heron, Egretta caerulea',
-  132: 'American egret, great white heron, Egretta albus',
-  133: 'bittern',
-  134: 'crane',
-  135: 'limpkin, Aramus pictus',
-  136: 'European gallinule, Porphyrio porphyrio',
-  137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
-  138: 'bustard',
-  139: 'ruddy turnstone, Arenaria interpres',
-  140: 'red-backed sandpiper, dunlin, Erolia alpina',
-  141: 'redshank, Tringa totanus',
-  142: 'dowitcher',
-  143: 'oystercatcher, oyster catcher',
-  144: 'pelican',
-  145: 'king penguin, Aptenodytes patagonica',
-  146: 'albatross, mollymawk',
-  147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
-  148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
-  149: 'dugong, Dugong dugon',
-  150: 'sea lion',
-  151: 'Chihuahua',
-  152: 'Japanese spaniel',
-  153: 'Maltese dog, Maltese terrier, Maltese',
-  154: 'Pekinese, Pekingese, Peke',
-  155: 'Shih-Tzu',
-  156: 'Blenheim spaniel',
-  157: 'papillon',
-  158: 'toy terrier',
-  159: 'Rhodesian ridgeback',
-  160: 'Afghan hound, Afghan',
-  161: 'basset, basset hound',
-  162: 'beagle',
-  163: 'bloodhound, sleuthhound',
-  164: 'bluetick',
-  165: 'black-and-tan coonhound',
-  166: 'Walker hound, Walker foxhound',
-  167: 'English foxhound',
-  168: 'redbone',
-  169: 'borzoi, Russian wolfhound',
-  170: 'Irish wolfhound',
-  171: 'Italian greyhound',
-  172: 'whippet',
-  173: 'Ibizan hound, Ibizan Podenco',
-  174: 'Norwegian elkhound, elkhound',
-  175: 'otterhound, otter hound',
-  176: 'Saluki, gazelle hound',
-  177: 'Scottish deerhound, deerhound',
-  178: 'Weimaraner',
-  179: 'Staffordshire bullterrier, Staffordshire bull terrier',
-  180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
-  181: 'Bedlington terrier',
-  182: 'Border terrier',
-  183: 'Kerry blue terrier',
-  184: 'Irish terrier',
-  185: 'Norfolk terrier',
-  186: 'Norwich terrier',
-  187: 'Yorkshire terrier',
-  188: 'wire-haired fox terrier',
-  189: 'Lakeland terrier',
-  190: 'Sealyham terrier, Sealyham',
-  191: 'Airedale, Airedale terrier',
-  192: 'cairn, cairn terrier',
-  193: 'Australian terrier',
-  194: 'Dandie Dinmont, Dandie Dinmont terrier',
-  195: 'Boston bull, Boston terrier',
-  196: 'miniature schnauzer',
-  197: 'giant schnauzer',
-  198: 'standard schnauzer',
-  199: 'Scotch terrier, Scottish terrier, Scottie',
-  200: 'Tibetan terrier, chrysanthemum dog',
-  201: 'silky terrier, Sydney silky',
-  202: 'soft-coated wheaten terrier',
-  203: 'West Highland white terrier',
-  204: 'Lhasa, Lhasa apso',
-  205: 'flat-coated retriever',
-  206: 'curly-coated retriever',
-  207: 'golden retriever',
-  208: 'Labrador retriever',
-  209: 'Chesapeake Bay retriever',
-  210: 'German short-haired pointer',
-  211: 'vizsla, Hungarian pointer',
-  212: 'English setter',
-  213: 'Irish setter, red setter',
-  214: 'Gordon setter',
-  215: 'Brittany spaniel',
-  216: 'clumber, clumber spaniel',
-  217: 'English springer, English springer spaniel',
-  218: 'Welsh springer spaniel',
-  219: 'cocker spaniel, English cocker spaniel, cocker',
-  220: 'Sussex spaniel',
-  221: 'Irish water spaniel',
-  222: 'kuvasz',
-  223: 'schipperke',
-  224: 'groenendael',
-  225: 'malinois',
-  226: 'briard',
-  227: 'kelpie',
-  228: 'komondor',
-  229: 'Old English sheepdog, bobtail',
-  230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
-  231: 'collie',
-  232: 'Border collie',
-  233: 'Bouvier des Flandres, Bouviers des Flandres',
-  234: 'Rottweiler',
-  235: 'German shepherd, German shepherd dog, German police dog, alsatian',
-  236: 'Doberman, Doberman pinscher',
-  237: 'miniature pinscher',
-  238: 'Greater Swiss Mountain dog',
-  239: 'Bernese mountain dog',
-  240: 'Appenzeller',
-  241: 'EntleBucher',
-  242: 'boxer',
-  243: 'bull mastiff',
-  244: 'Tibetan mastiff',
-  245: 'French bulldog',
-  246: 'Great Dane',
-  247: 'Saint Bernard, St Bernard',
-  248: 'Eskimo dog, husky',
-  249: 'malamute, malemute, Alaskan malamute',
-  250: 'Siberian husky',
-  251: 'dalmatian, coach dog, carriage dog',
-  252: 'affenpinscher, monkey pinscher, monkey dog',
-  253: 'basenji',
-  254: 'pug, pug-dog',
-  255: 'Leonberg',
-  256: 'Newfoundland, Newfoundland dog',
-  257: 'Great Pyrenees',
-  258: 'Samoyed, Samoyede',
-  259: 'Pomeranian',
-  260: 'chow, chow chow',
-  261: 'keeshond',
-  262: 'Brabancon griffon',
-  263: 'Pembroke, Pembroke Welsh corgi',
-  264: 'Cardigan, Cardigan Welsh corgi',
-  265: 'toy poodle',
-  266: 'miniature poodle',
-  267: 'standard poodle',
-  268: 'Mexican hairless',
-  269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
-  270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
-  271: 'red wolf, maned wolf, Canis rufus, Canis niger',
-  272: 'coyote, prairie wolf, brush wolf, Canis latrans',
-  273: 'dingo, warrigal, warragal, Canis dingo',
-  274: 'dhole, Cuon alpinus',
-  275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
-  276: 'hyena, hyaena',
-  277: 'red fox, Vulpes vulpes',
-  278: 'kit fox, Vulpes macrotis',
-  279: 'Arctic fox, white fox, Alopex lagopus',
-  280: 'grey fox, gray fox, Urocyon cinereoargenteus',
-  281: 'tabby, tabby cat',
-  282: 'tiger cat',
-  283: 'Persian cat',
-  284: 'Siamese cat, Siamese',
-  285: 'Egyptian cat',
-  286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
-  287: 'lynx, catamount',
-  288: 'leopard, Panthera pardus',
-  289: 'snow leopard, ounce, Panthera uncia',
-  290: 'jaguar, panther, Panthera onca, Felis onca',
-  291: 'lion, king of beasts, Panthera leo',
-  292: 'tiger, Panthera tigris',
-  293: 'cheetah, chetah, Acinonyx jubatus',
-  294: 'brown bear, bruin, Ursus arctos',
-  295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
-  296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
-  297: 'sloth bear, Melursus ursinus, Ursus ursinus',
-  298: 'mongoose',
-  299: 'meerkat, mierkat',
-  300: 'tiger beetle',
-  301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
-  302: 'ground beetle, carabid beetle',
-  303: 'long-horned beetle, longicorn, longicorn beetle',
-  304: 'leaf beetle, chrysomelid',
-  305: 'dung beetle',
-  306: 'rhinoceros beetle',
-  307: 'weevil',
-  308: 'fly',
-  309: 'bee',
-  310: 'ant, emmet, pismire',
-  311: 'grasshopper, hopper',
-  312: 'cricket',
-  313: 'walking stick, walkingstick, stick insect',
-  314: 'cockroach, roach',
-  315: 'mantis, mantid',
-  316: 'cicada, cicala',
-  317: 'leafhopper',
-  318: 'lacewing, lacewing fly',
-  319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
-  320: 'damselfly',
-  321: 'admiral',
-  322: 'ringlet, ringlet butterfly',
-  323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
-  324: 'cabbage butterfly',
-  325: 'sulphur butterfly, sulfur butterfly',
-  326: 'lycaenid, lycaenid butterfly',
-  327: 'starfish, sea star',
-  328: 'sea urchin',
-  329: 'sea cucumber, holothurian',
-  330: 'wood rabbit, cottontail, cottontail rabbit',
-  331: 'hare',
-  332: 'Angora, Angora rabbit',
-  333: 'hamster',
-  334: 'porcupine, hedgehog',
-  335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
-  336: 'marmot',
-  337: 'beaver',
-  338: 'guinea pig, Cavia cobaya',
-  339: 'sorrel',
-  340: 'zebra',
-  341: 'hog, pig, grunter, squealer, Sus scrofa',
-  342: 'wild boar, boar, Sus scrofa',
-  343: 'warthog',
-  344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
-  345: 'ox',
-  346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
-  347: 'bison',
-  348: 'ram, tup',
-  349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
-  350: 'ibex, Capra ibex',
-  351: 'hartebeest',
-  352: 'impala, Aepyceros melampus',
-  353: 'gazelle',
-  354: 'Arabian camel, dromedary, Camelus dromedarius',
-  355: 'llama',
-  356: 'weasel',
-  357: 'mink',
-  358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
-  359: 'black-footed ferret, ferret, Mustela nigripes',
-  360: 'otter',
-  361: 'skunk, polecat, wood pussy',
-  362: 'badger',
-  363: 'armadillo',
-  364: 'three-toed sloth, ai, Bradypus tridactylus',
-  365: 'orangutan, orang, orangutang, Pongo pygmaeus',
-  366: 'gorilla, Gorilla gorilla',
-  367: 'chimpanzee, chimp, Pan troglodytes',
-  368: 'gibbon, Hylobates lar',
-  369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
-  370: 'guenon, guenon monkey',
-  371: 'patas, hussar monkey, Erythrocebus patas',
-  372: 'baboon',
-  373: 'macaque',
-  374: 'langur',
-  375: 'colobus, colobus monkey',
-  376: 'proboscis monkey, Nasalis larvatus',
-  377: 'marmoset',
-  378: 'capuchin, ringtail, Cebus capucinus',
-  379: 'howler monkey, howler',
-  380: 'titi, titi monkey',
-  381: 'spider monkey, Ateles geoffroyi',
-  382: 'squirrel monkey, Saimiri sciureus',
-  383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
-  384: 'indri, indris, Indri indri, Indri brevicaudatus',
-  385: 'Indian elephant, Elephas maximus',
-  386: 'African elephant, Loxodonta africana',
-  387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
-  388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
-  389: 'barracouta, snoek',
-  390: 'eel',
-  391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
-  392: 'rock beauty, Holocanthus tricolor',
-  393: 'anemone fish',
-  394: 'sturgeon',
-  395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
-  396: 'lionfish',
-  397: 'puffer, pufferfish, blowfish, globefish',
-  398: 'abacus',
-  399: 'abaya',
-  400: "academic gown, academic robe, judge's robe",
-  401: 'accordion, piano accordion, squeeze box',
-  402: 'acoustic guitar',
-  403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
-  404: 'airliner',
-  405: 'airship, dirigible',
-  406: 'altar',
-  407: 'ambulance',
-  408: 'amphibian, amphibious vehicle',
-  409: 'analog clock',
-  410: 'apiary, bee house',
-  411: 'apron',
-  412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
-  413: 'assault rifle, assault gun',
-  414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
-  415: 'bakery, bakeshop, bakehouse',
-  416: 'balance beam, beam',
-  417: 'balloon',
-  418: 'ballpoint, ballpoint pen, ballpen, Biro',
-  419: 'Band Aid',
-  420: 'banjo',
-  421: 'bannister, banister, balustrade, balusters, handrail',
-  422: 'barbell',
-  423: 'barber chair',
-  424: 'barbershop',
-  425: 'barn',
-  426: 'barometer',
-  427: 'barrel, cask',
-  428: 'barrow, garden cart, lawn cart, wheelbarrow',
-  429: 'baseball',
-  430: 'basketball',
-  431: 'bassinet',
-  432: 'bassoon',
-  433: 'bathing cap, swimming cap',
-  434: 'bath towel',
-  435: 'bathtub, bathing tub, bath, tub',
-  436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
-  437: 'beacon, lighthouse, beacon light, pharos',
-  438: 'beaker',
-  439: 'bearskin, busby, shako',
-  440: 'beer bottle',
-  441: 'beer glass',
-  442: 'bell cote, bell cot',
-  443: 'bib',
-  444: 'bicycle-built-for-two, tandem bicycle, tandem',
-  445: 'bikini, two-piece',
-  446: 'binder, ring-binder',
-  447: 'binoculars, field glasses, opera glasses',
-  448: 'birdhouse',
-  449: 'boathouse',
-  450: 'bobsled, bobsleigh, bob',
-  451: 'bolo tie, bolo, bola tie, bola',
-  452: 'bonnet, poke bonnet',
-  453: 'bookcase',
-  454: 'bookshop, bookstore, bookstall',
-  455: 'bottlecap',
-  456: 'bow',
-  457: 'bow tie, bow-tie, bowtie',
-  458: 'brass, memorial tablet, plaque',
-  459: 'brassiere, bra, bandeau',
-  460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
-  461: 'breastplate, aegis, egis',
-  462: 'broom',
-  463: 'bucket, pail',
-  464: 'buckle',
-  465: 'bulletproof vest',
-  466: 'bullet train, bullet',
-  467: 'butcher shop, meat market',
-  468: 'cab, hack, taxi, taxicab',
-  469: 'caldron, cauldron',
-  470: 'candle, taper, wax light',
-  471: 'cannon',
-  472: 'canoe',
-  473: 'can opener, tin opener',
-  474: 'cardigan',
-  475: 'car mirror',
-  476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
-  477: "carpenter's kit, tool kit",
-  478: 'carton',
-  479: 'car wheel',
-  480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
-  481: 'cassette',
-  482: 'cassette player',
-  483: 'castle',
-  484: 'catamaran',
-  485: 'CD player',
-  486: 'cello, violoncello',
-  487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
-  488: 'chain',
-  489: 'chainlink fence',
-  490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
-  491: 'chain saw, chainsaw',
-  492: 'chest',
-  493: 'chiffonier, commode',
-  494: 'chime, bell, gong',
-  495: 'china cabinet, china closet',
-  496: 'Christmas stocking',
-  497: 'church, church building',
-  498: 'cinema, movie theater, movie theatre, movie house, picture palace',
-  499: 'cleaver, meat cleaver, chopper',
-  500: 'cliff dwelling',
-  501: 'cloak',
-  502: 'clog, geta, patten, sabot',
-  503: 'cocktail shaker',
-  504: 'coffee mug',
-  505: 'coffeepot',
-  506: 'coil, spiral, volute, whorl, helix',
-  507: 'combination lock',
-  508: 'computer keyboard, keypad',
-  509: 'confectionery, confectionary, candy store',
-  510: 'container ship, containership, container vessel',
-  511: 'convertible',
-  512: 'corkscrew, bottle screw',
-  513: 'cornet, horn, trumpet, trump',
-  514: 'cowboy boot',
-  515: 'cowboy hat, ten-gallon hat',
-  516: 'cradle',
-  517: 'crane',
-  518: 'crash helmet',
-  519: 'crate',
-  520: 'crib, cot',
-  521: 'Crock Pot',
-  522: 'croquet ball',
-  523: 'crutch',
-  524: 'cuirass',
-  525: 'dam, dike, dyke',
-  526: 'desk',
-  527: 'desktop computer',
-  528: 'dial telephone, dial phone',
-  529: 'diaper, nappy, napkin',
-  530: 'digital clock',
-  531: 'digital watch',
-  532: 'dining table, board',
-  533: 'dishrag, dishcloth',
-  534: 'dishwasher, dish washer, dishwashing machine',
-  535: 'disk brake, disc brake',
-  536: 'dock, dockage, docking facility',
-  537: 'dogsled, dog sled, dog sleigh',
-  538: 'dome',
-  539: 'doormat, welcome mat',
-  540: 'drilling platform, offshore rig',
-  541: 'drum, membranophone, tympan',
-  542: 'drumstick',
-  543: 'dumbbell',
-  544: 'Dutch oven',
-  545: 'electric fan, blower',
-  546: 'electric guitar',
-  547: 'electric locomotive',
-  548: 'entertainment center',
-  549: 'envelope',
-  550: 'espresso maker',
-  551: 'face powder',
-  552: 'feather boa, boa',
-  553: 'file, file cabinet, filing cabinet',
-  554: 'fireboat',
-  555: 'fire engine, fire truck',
-  556: 'fire screen, fireguard',
-  557: 'flagpole, flagstaff',
-  558: 'flute, transverse flute',
-  559: 'folding chair',
-  560: 'football helmet',
-  561: 'forklift',
-  562: 'fountain',
-  563: 'fountain pen',
-  564: 'four-poster',
-  565: 'freight car',
-  566: 'French horn, horn',
-  567: 'frying pan, frypan, skillet',
-  568: 'fur coat',
-  569: 'garbage truck, dustcart',
-  570: 'gasmask, respirator, gas helmet',
-  571: 'gas pump, gasoline pump, petrol pump, island dispenser',
-  572: 'goblet',
-  573: 'go-kart',
-  574: 'golf ball',
-  575: 'golfcart, golf cart',
-  576: 'gondola',
-  577: 'gong, tam-tam',
-  578: 'gown',
-  579: 'grand piano, grand',
-  580: 'greenhouse, nursery, glasshouse',
-  581: 'grille, radiator grille',
-  582: 'grocery store, grocery, food market, market',
-  583: 'guillotine',
-  584: 'hair slide',
-  585: 'hair spray',
-  586: 'half track',
-  587: 'hammer',
-  588: 'hamper',
-  589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
-  590: 'hand-held computer, hand-held microcomputer',
-  591: 'handkerchief, hankie, hanky, hankey',
-  592: 'hard disc, hard disk, fixed disk',
-  593: 'harmonica, mouth organ, harp, mouth harp',
-  594: 'harp',
-  595: 'harvester, reaper',
-  596: 'hatchet',
-  597: 'holster',
-  598: 'home theater, home theatre',
-  599: 'honeycomb',
-  600: 'hook, claw',
-  601: 'hoopskirt, crinoline',
-  602: 'horizontal bar, high bar',
-  603: 'horse cart, horse-cart',
-  604: 'hourglass',
-  605: 'iPod',
-  606: 'iron, smoothing iron',
-  607: "jack-o'-lantern",
-  608: 'jean, blue jean, denim',
-  609: 'jeep, landrover',
-  610: 'jersey, T-shirt, tee shirt',
-  611: 'jigsaw puzzle',
-  612: 'jinrikisha, ricksha, rickshaw',
-  613: 'joystick',
-  614: 'kimono',
-  615: 'knee pad',
-  616: 'knot',
-  617: 'lab coat, laboratory coat',
-  618: 'ladle',
-  619: 'lampshade, lamp shade',
-  620: 'laptop, laptop computer',
-  621: 'lawn mower, mower',
-  622: 'lens cap, lens cover',
-  623: 'letter opener, paper knife, paperknife',
-  624: 'library',
-  625: 'lifeboat',
-  626: 'lighter, light, igniter, ignitor',
-  627: 'limousine, limo',
-  628: 'liner, ocean liner',
-  629: 'lipstick, lip rouge',
-  630: 'Loafer',
-  631: 'lotion',
-  632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
-  633: "loupe, jeweler's loupe",
-  634: 'lumbermill, sawmill',
-  635: 'magnetic compass',
-  636: 'mailbag, postbag',
-  637: 'mailbox, letter box',
-  638: 'maillot',
-  639: 'maillot, tank suit',
-  640: 'manhole cover',
-  641: 'maraca',
-  642: 'marimba, xylophone',
-  643: 'mask',
-  644: 'matchstick',
-  645: 'maypole',
-  646: 'maze, labyrinth',
-  647: 'measuring cup',
-  648: 'medicine chest, medicine cabinet',
-  649: 'megalith, megalithic structure',
-  650: 'microphone, mike',
-  651: 'microwave, microwave oven',
-  652: 'military uniform',
-  653: 'milk can',
-  654: 'minibus',
-  655: 'miniskirt, mini',
-  656: 'minivan',
-  657: 'missile',
-  658: 'mitten',
-  659: 'mixing bowl',
-  660: 'mobile home, manufactured home',
-  661: 'Model T',
-  662: 'modem',
-  663: 'monastery',
-  664: 'monitor',
-  665: 'moped',
-  666: 'mortar',
-  667: 'mortarboard',
-  668: 'mosque',
-  669: 'mosquito net',
-  670: 'motor scooter, scooter',
-  671: 'mountain bike, all-terrain bike, off-roader',
-  672: 'mountain tent',
-  673: 'mouse, computer mouse',
-  674: 'mousetrap',
-  675: 'moving van',
-  676: 'muzzle',
-  677: 'nail',
-  678: 'neck brace',
-  679: 'necklace',
-  680: 'nipple',
-  681: 'notebook, notebook computer',
-  682: 'obelisk',
-  683: 'oboe, hautboy, hautbois',
-  684: 'ocarina, sweet potato',
-  685: 'odometer, hodometer, mileometer, milometer',
-  686: 'oil filter',
-  687: 'organ, pipe organ',
-  688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
-  689: 'overskirt',
-  690: 'oxcart',
-  691: 'oxygen mask',
-  692: 'packet',
-  693: 'paddle, boat paddle',
-  694: 'paddlewheel, paddle wheel',
-  695: 'padlock',
-  696: 'paintbrush',
-  697: "pajama, pyjama, pj's, jammies",
-  698: 'palace',
-  699: 'panpipe, pandean pipe, syrinx',
-  700: 'paper towel',
-  701: 'parachute, chute',
-  702: 'parallel bars, bars',
-  703: 'park bench',
-  704: 'parking meter',
-  705: 'passenger car, coach, carriage',
-  706: 'patio, terrace',
-  707: 'pay-phone, pay-station',
-  708: 'pedestal, plinth, footstall',
-  709: 'pencil box, pencil case',
-  710: 'pencil sharpener',
-  711: 'perfume, essence',
-  712: 'Petri dish',
-  713: 'photocopier',
-  714: 'pick, plectrum, plectron',
-  715: 'pickelhaube',
-  716: 'picket fence, paling',
-  717: 'pickup, pickup truck',
-  718: 'pier',
-  719: 'piggy bank, penny bank',
-  720: 'pill bottle',
-  721: 'pillow',
-  722: 'ping-pong ball',
-  723: 'pinwheel',
-  724: 'pirate, pirate ship',
-  725: 'pitcher, ewer',
-  726: "plane, carpenter's plane, woodworking plane",
-  727: 'planetarium',
-  728: 'plastic bag',
-  729: 'plate rack',
-  730: 'plow, plough',
-  731: "plunger, plumber's helper",
-  732: 'Polaroid camera, Polaroid Land camera',
-  733: 'pole',
-  734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
-  735: 'poncho',
-  736: 'pool table, billiard table, snooker table',
-  737: 'pop bottle, soda bottle',
-  738: 'pot, flowerpot',
-  739: "potter's wheel",
-  740: 'power drill',
-  741: 'prayer rug, prayer mat',
-  742: 'printer',
-  743: 'prison, prison house',
-  744: 'projectile, missile',
-  745: 'projector',
-  746: 'puck, hockey puck',
-  747: 'punching bag, punch bag, punching ball, punchball',
-  748: 'purse',
-  749: 'quill, quill pen',
-  750: 'quilt, comforter, comfort, puff',
-  751: 'racer, race car, racing car',
-  752: 'racket, racquet',
-  753: 'radiator',
-  754: 'radio, wireless',
-  755: 'radio telescope, radio reflector',
-  756: 'rain barrel',
-  757: 'recreational vehicle, RV, R.V.',
-  758: 'reel',
-  759: 'reflex camera',
-  760: 'refrigerator, icebox',
-  761: 'remote control, remote',
-  762: 'restaurant, eating house, eating place, eatery',
-  763: 'revolver, six-gun, six-shooter',
-  764: 'rifle',
-  765: 'rocking chair, rocker',
-  766: 'rotisserie',
-  767: 'rubber eraser, rubber, pencil eraser',
-  768: 'rugby ball',
-  769: 'rule, ruler',
-  770: 'running shoe',
-  771: 'safe',
-  772: 'safety pin',
-  773: 'saltshaker, salt shaker',
-  774: 'sandal',
-  775: 'sarong',
-  776: 'sax, saxophone',
-  777: 'scabbard',
-  778: 'scale, weighing machine',
-  779: 'school bus',
-  780: 'schooner',
-  781: 'scoreboard',
-  782: 'screen, CRT screen',
-  783: 'screw',
-  784: 'screwdriver',
-  785: 'seat belt, seatbelt',
-  786: 'sewing machine',
-  787: 'shield, buckler',
-  788: 'shoe shop, shoe-shop, shoe store',
-  789: 'shoji',
-  790: 'shopping basket',
-  791: 'shopping cart',
-  792: 'shovel',
-  793: 'shower cap',
-  794: 'shower curtain',
-  795: 'ski',
-  796: 'ski mask',
-  797: 'sleeping bag',
-  798: 'slide rule, slipstick',
-  799: 'sliding door',
-  800: 'slot, one-armed bandit',
-  801: 'snorkel',
-  802: 'snowmobile',
-  803: 'snowplow, snowplough',
-  804: 'soap dispenser',
-  805: 'soccer ball',
-  806: 'sock',
-  807: 'solar dish, solar collector, solar furnace',
-  808: 'sombrero',
-  809: 'soup bowl',
-  810: 'space bar',
-  811: 'space heater',
-  812: 'space shuttle',
-  813: 'spatula',
-  814: 'speedboat',
-  815: "spider web, spider's web",
-  816: 'spindle',
-  817: 'sports car, sport car',
-  818: 'spotlight, spot',
-  819: 'stage',
-  820: 'steam locomotive',
-  821: 'steel arch bridge',
-  822: 'steel drum',
-  823: 'stethoscope',
-  824: 'stole',
-  825: 'stone wall',
-  826: 'stopwatch, stop watch',
-  827: 'stove',
-  828: 'strainer',
-  829: 'streetcar, tram, tramcar, trolley, trolley car',
-  830: 'stretcher',
-  831: 'studio couch, day bed',
-  832: 'stupa, tope',
-  833: 'submarine, pigboat, sub, U-boat',
-  834: 'suit, suit of clothes',
-  835: 'sundial',
-  836: 'sunglass',
-  837: 'sunglasses, dark glasses, shades',
-  838: 'sunscreen, sunblock, sun blocker',
-  839: 'suspension bridge',
-  840: 'swab, swob, mop',
-  841: 'sweatshirt',
-  842: 'swimming trunks, bathing trunks',
-  843: 'swing',
-  844: 'switch, electric switch, electrical switch',
-  845: 'syringe',
-  846: 'table lamp',
-  847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
-  848: 'tape player',
-  849: 'teapot',
-  850: 'teddy, teddy bear',
-  851: 'television, television system',
-  852: 'tennis ball',
-  853: 'thatch, thatched roof',
-  854: 'theater curtain, theatre curtain',
-  855: 'thimble',
-  856: 'thresher, thrasher, threshing machine',
-  857: 'throne',
-  858: 'tile roof',
-  859: 'toaster',
-  860: 'tobacco shop, tobacconist shop, tobacconist',
-  861: 'toilet seat',
-  862: 'torch',
-  863: 'totem pole',
-  864: 'tow truck, tow car, wrecker',
-  865: 'toyshop',
-  866: 'tractor',
-  867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
-  868: 'tray',
-  869: 'trench coat',
-  870: 'tricycle, trike, velocipede',
-  871: 'trimaran',
-  872: 'tripod',
-  873: 'triumphal arch',
-  874: 'trolleybus, trolley coach, trackless trolley',
-  875: 'trombone',
-  876: 'tub, vat',
-  877: 'turnstile',
-  878: 'typewriter keyboard',
-  879: 'umbrella',
-  880: 'unicycle, monocycle',
-  881: 'upright, upright piano',
-  882: 'vacuum, vacuum cleaner',
-  883: 'vase',
-  884: 'vault',
-  885: 'velvet',
-  886: 'vending machine',
-  887: 'vestment',
-  888: 'viaduct',
-  889: 'violin, fiddle',
-  890: 'volleyball',
-  891: 'waffle iron',
-  892: 'wall clock',
-  893: 'wallet, billfold, notecase, pocketbook',
-  894: 'wardrobe, closet, press',
-  895: 'warplane, military plane',
-  896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
-  897: 'washer, automatic washer, washing machine',
-  898: 'water bottle',
-  899: 'water jug',
-  900: 'water tower',
-  901: 'whiskey jug',
-  902: 'whistle',
-  903: 'wig',
-  904: 'window screen',
-  905: 'window shade',
-  906: 'Windsor tie',
-  907: 'wine bottle',
-  908: 'wing',
-  909: 'wok',
-  910: 'wooden spoon',
-  911: 'wool, woolen, woollen',
-  912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
-  913: 'wreck',
-  914: 'yawl',
-  915: 'yurt',
-  916: 'web site, website, internet site, site',
-  917: 'comic book',
-  918: 'crossword puzzle, crossword',
-  919: 'street sign',
-  920: 'traffic light, traffic signal, stoplight',
-  921: 'book jacket, dust cover, dust jacket, dust wrapper',
-  922: 'menu',
-  923: 'plate',
-  924: 'guacamole',
-  925: 'consomme',
-  926: 'hot pot, hotpot',
-  927: 'trifle',
-  928: 'ice cream, icecream',
-  929: 'ice lolly, lolly, lollipop, popsicle',
-  930: 'French loaf',
-  931: 'bagel, beigel',
-  932: 'pretzel',
-  933: 'cheeseburger',
-  934: 'hotdog, hot dog, red hot',
-  935: 'mashed potato',
-  936: 'head cabbage',
-  937: 'broccoli',
-  938: 'cauliflower',
-  939: 'zucchini, courgette',
-  940: 'spaghetti squash',
-  941: 'acorn squash',
-  942: 'butternut squash',
-  943: 'cucumber, cuke',
-  944: 'artichoke, globe artichoke',
-  945: 'bell pepper',
-  946: 'cardoon',
-  947: 'mushroom',
-  948: 'Granny Smith',
-  949: 'strawberry',
-  950: 'orange',
-  951: 'lemon',
-  952: 'fig',
-  953: 'pineapple, ananas',
-  954: 'banana',
-  955: 'jackfruit, jak, jack',
-  956: 'custard apple',
-  957: 'pomegranate',
-  958: 'hay',
-  959: 'carbonara',
-  960: 'chocolate sauce, chocolate syrup',
-  961: 'dough',
-  962: 'meat loaf, meatloaf',
-  963: 'pizza, pizza pie',
-  964: 'potpie',
-  965: 'burrito',
-  966: 'red wine',
-  967: 'espresso',
-  968: 'cup',
-  969: 'eggnog',
-  970: 'alp',
-  971: 'bubble',
-  972: 'cliff, drop, drop-off',
-  973: 'coral reef',
-  974: 'geyser',
-  975: 'lakeside, lakeshore',
-  976: 'promontory, headland, head, foreland',
-  977: 'sandbar, sand bar',
-  978: 'seashore, coast, seacoast, sea-coast',
-  979: 'valley, vale',
-  980: 'volcano',
-  981: 'ballplayer, baseball player',
-  982: 'groom, bridegroom',
-  983: 'scuba diver',
-  984: 'rapeseed',
-  985: 'daisy',
-  986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
-  987: 'corn',
-  988: 'acorn',
-  989: 'hip, rose hip, rosehip',
-  990: 'buckeye, horse chestnut, conker',
-  991: 'coral fungus',
-  992: 'agaric',
-  993: 'gyromitra',
-  994: 'stinkhorn, carrion fungus',
-  995: 'earthstar',
-  996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
-  997: 'bolete',
-  998: 'ear, spike, capitulum',
-  999: 'toilet tissue, toilet paper, bathroom tissue'
-}
\ No newline at end of file
diff --git a/nchw2nhwc_tool/inference.py b/nchw2nhwc_tool/inference.py
deleted file mode 100644
index b5a7a1cdfed8205dd4a4b06931cb3e7f592d0836..0000000000000000000000000000000000000000
--- a/nchw2nhwc_tool/inference.py
+++ /dev/null
@@ -1,299 +0,0 @@
-import os
-import numpy as np
-from PIL import Image
-from imagenet1000_clsidx_to_labels import clsidx_2_labels
-import argparse
-
-def str2bool(v):
-        if v.lower() in ('yes', 'true', 't', 'y', '1'):
-            return True
-        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
-            return False
-        else:
-            raise argparse.ArgumentTypeError('Unsupported value encountered.')
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("--log_dir", type=str,
-                        default="./output", help="log info save directory")
-parser.add_argument("--model_load_dir", type=str,
-                        default=None, help="model load directory if need")
-parser.add_argument("--image_path", type=str, default='test_img/tiger.jpg', help="image path")
-parser.add_argument(
-        '--channel_last',
-        type=str2bool,
-        nargs='?',
-        const=False,
-        help='Whether to use use channel last mode(nhwc)'
-    )
-# fuse bn relu or bn add relu
-parser.add_argument(
-    '--fuse_bn_relu',
-    type=str2bool,
-    default=False,
-    help='Whether to use use fuse batch normalization relu. Currently supported in origin/master of OneFlow only.'
-)
-parser.add_argument(
-    '--fuse_bn_add_relu',
-    type=str2bool,
-    default=False,
-    help='Whether to use use fuse batch normalization add relu. Currently supported in origin/master of OneFlow only.'
-)
-parser.add_argument(
-        '--pad_output',
-        type=str2bool,
-        nargs='?',
-        const=True,
-        help='Whether to pad the output to number of image channels to 4.'
-    )
-
-args = parser.parse_args()
-
-import oneflow as flow
-import oneflow.typing as tp
-
-#---------------------------------------------------#
-#   ResNet50网络
-#---------------------------------------------------#
-BLOCK_COUNTS = [3, 4, 6, 3]
-BLOCK_FILTERS = [256, 512, 1024, 2048]
-BLOCK_FILTERS_INNER = [64, 128, 256, 512]
-
-
-class ResnetBuilder(object):
-    def __init__(self, weight_regularizer, trainable=True, training=True, channel_last=False, fuse_bn_relu=True, fuse_bn_add_relu=True):
-        self.data_format = "NHWC" if channel_last else "NCHW"
-        self.weight_initializer = flow.variance_scaling_initializer(2, 'fan_in', 'random_normal',
-                                                                    data_format=self.data_format)
-        self.weight_regularizer = weight_regularizer
-        self.trainable = trainable
-        self.training = training
-        self.fuse_bn_relu = fuse_bn_relu
-        self.fuse_bn_add_relu = fuse_bn_add_relu
-
-    def _conv2d(
-            self,
-            name,
-            input,
-            filters,
-            kernel_size,
-            strides=1,
-            padding="SAME",
-            dilations=1,
-    ):
-        # There are different shapes of weight metric between 'NCHW' and 'NHWC' mode
-        if self.data_format == "NHWC":
-            shape = (filters, kernel_size, kernel_size, input.shape[3])
-        else:
-            shape = (filters, input.shape[1], kernel_size, kernel_size)
-        weight = flow.get_variable(
-            name + "-weight",
-            shape=shape,
-            dtype=input.dtype,
-            initializer=self.weight_initializer,
-            regularizer=self.weight_regularizer,
-            model_name="weight",
-            trainable=self.trainable,
-        )
-
-        return flow.nn.conv2d(input, weight, strides, padding, self.data_format, dilations, name=name)
-
-    def _batch_norm(self, inputs, name=None, last=False):
-        initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-        axis = 1
-        if self.data_format =="NHWC":
-            axis = 3
-        return flow.layers.batch_normalization(
-            inputs=inputs,
-            axis=axis,
-            momentum=0.9,  # 97,
-            epsilon=1e-5,
-            center=True,
-            scale=True,
-            trainable=self.trainable,
-            training=self.training,
-            gamma_initializer=initializer,
-            moving_variance_initializer=initializer,
-            gamma_regularizer=self.weight_regularizer,
-            beta_regularizer=self.weight_regularizer,
-            name=name,
-        )
-
-    def _batch_norm_relu(self, inputs, name=None, last=False):
-        if self.fuse_bn_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format =="NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_relu(
-                inputs=inputs,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name + "_bn_relu",
-            )
-        else:
-            return flow.nn.relu(self._batch_norm(inputs, name + "_bn", last=last))
-
-    def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
-        if self.fuse_bn_add_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format =="NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_add_relu(
-                inputs=inputs,
-                addend=addend,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name+"_bn_add_relu",
-            )
-        else:
-            return flow.nn.relu(self._batch_norm(inputs, name+"_bn", last=last) + addend)
-
-    def conv2d_affine(self, input, name, filters, kernel_size, strides):
-        # input data_format must be NCHW, cannot check now
-        padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
-        output = self._conv2d(name, input, filters, kernel_size, strides, padding)
-        return output
-
-    def bottleneck_transformation(self, input, block_name, filters, filters_inner, strides):
-        a = self.conv2d_affine(
-            input, block_name + "_branch2a", filters_inner, 1, 1)
-        a = self._batch_norm_relu(a, block_name + "_branch2a")
-
-        b = self.conv2d_affine(
-            a, block_name + "_branch2b", filters_inner, 3, strides)
-        b = self._batch_norm_relu(b, block_name + "_branch2b")
-
-        c = self.conv2d_affine(b, block_name + "_branch2c", filters, 1, 1)
-        return c
-
-    def residual_block(self, input, block_name, filters, filters_inner, strides_init):
-        if strides_init != 1 or block_name == "res2_0":
-            shortcut = self.conv2d_affine(
-                input, block_name + "_branch1", filters, 1, strides_init
-            )
-            shortcut = self._batch_norm(shortcut, block_name + "_branch1_bn")
-        else:
-            shortcut = input
-
-        bottleneck = self.bottleneck_transformation(
-            input, block_name, filters, filters_inner, strides_init,
-        )
-        return self._batch_norm_add_relu(bottleneck, shortcut, block_name + "_branch2c", last=True)
-
-    def residual_stage(self, input, stage_name, counts, filters, filters_inner, stride_init=2):
-        output = input
-        for i in range(counts):
-            block_name = "%s_%d" % (stage_name, i)
-            output = self.residual_block(
-                output, block_name, filters, filters_inner, stride_init if i == 0 else 1
-            )
-
-        return output
-
-    def resnet_conv_x_body(self, input):
-        output = input
-        for i, (counts, filters, filters_inner) in enumerate(
-                zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
-        ):
-            stage_name = "res%d" % (i + 2)
-            output = self.residual_stage(
-                output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2
-            )
-        return output
-
-    def resnet_stem(self, input):
-        conv1 = self._conv2d("conv1", input, 64, 7, 2)
-        conv1_bn = self._batch_norm_relu(conv1, "conv1")
-        pool1 = flow.nn.max_pool2d(
-            conv1_bn, ksize=3, strides=2, padding="SAME", data_format=self.data_format, name="pool1",
-        )
-        return pool1
-
-
-def resnet50(images, args, trainable=True, training=True):
-    weight_regularizer = None
-    builder = ResnetBuilder(weight_regularizer, trainable, training, args.channel_last, args.fuse_bn_relu, args.fuse_bn_add_relu)
-    if args.pad_output:
-        if args.channel_last: 
-            paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
-        else:
-            paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
-        images = flow.pad(images, paddings=paddings)
-    with flow.scope.namespace("Resnet"):
-        stem = builder.resnet_stem(images)
-        body = builder.resnet_conv_x_body(stem)
-        pool5 = flow.nn.avg_pool2d(
-            body, ksize=7, strides=1, padding="VALID", data_format=builder.data_format, name="pool5",
-        )
-        fc1001 = flow.layers.dense(
-            flow.reshape(pool5, (pool5.shape[0], -1)),
-            units=1000,
-            use_bias=True,
-            kernel_initializer=flow.variance_scaling_initializer(2, 'fan_in', 'random_normal'),
-            bias_initializer=flow.zeros_initializer(),
-            kernel_regularizer=weight_regularizer,
-            bias_regularizer=weight_regularizer,
-            trainable=trainable,
-            name="fc1001",
-        )
-    return fc1001
-
-
-#---------------------------------------------------#
-#   推理部分
-#---------------------------------------------------#
-
-def load_image(image_path='test_img/ILSVRC2012_val_00020287.JPEG'):
-    im = Image.open(image_path)
-    im = im.resize((224, 224))
-    im = im.convert('RGB')  # 有的图像是单通道的，不加转换会报错
-    im = np.array(im).astype('float32')
-    im = (im - [123.68, 116.779, 103.939]) / [58.393, 57.12, 57.375]
-    im = np.transpose(im, (2, 0, 1))
-    im = np.expand_dims(im, axis=0)
-    if args.channel_last:
-        im = np.transpose(im, (0, 2, 3, 1))
-    return np.ascontiguousarray(im, 'float32')
-
-
-@flow.global_function("predict", flow.function_config())
-def InferenceNet(images: tp.Numpy.Placeholder((1, 224, 224, 3), dtype=flow.float)) -> tp.Numpy:
-    logits = resnet50(images, args, training=False)
-    predictions = flow.nn.softmax(logits)
-    return predictions
-
-
-def main():
-    flow.env.log_dir(args.log_dir)
-    assert os.path.isdir(args.model_load_dir)
-    flow.load_variables(flow.checkpoint.get(args.model_load_dir))
-
-    image = load_image(args.image_path)
-    predictions = InferenceNet(image)
-    clsidx = predictions.argmax()
-    print(predictions.max(), clsidx_2_labels[clsidx])
-    # flow.checkpoint.save("./resnet50")
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/nchw2nhwc_tool/nchw2nhwc.py b/nchw2nhwc_tool/nchw2nhwc.py
deleted file mode 100644
index d45e34dccc5ce79603ab2504e3d6f00ded2688db..0000000000000000000000000000000000000000
--- a/nchw2nhwc_tool/nchw2nhwc.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#coding=utf-8
-
-import os
-import argparse
-import numpy as np
-import oneflow as flow
-import shutil
-import struct
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("--input_model_dir", type=str,
-                        default="./resnet50", help="input model directory")
-
-parser.add_argument("--output_model_dir", type=str,
-                        default="./resnet50_nhwc", help="output model directory")
-
-args = parser.parse_args()
-
-input_model_dir = args.input_model_dir
-output_model_dir = args.output_model_dir
-
-files = os.listdir(input_model_dir)
-
-if os.path.exists(output_model_dir):
-    del_list = os.listdir(output_model_dir)
-    for f in del_list:
-        file_path = os.path.join(output_model_dir, f)
-        if os.path.isfile(file_path):
-            os.remove(file_path)
-        elif os.path.isdir(file_path):
-            shutil.rmtree(file_path)
-    os.rmdir(output_model_dir)
-
-for file in files:
-    m = os.path.join(input_model_dir, file)
-    new_m = os.path.join(output_model_dir, file)
-
-    if (os.path.isdir(m)):
-        if not os.path.exists(new_m):
-            os.makedirs(new_m)
-        
-        weight_file = os.path.join(m, "out")
-        meta_file = os.path.join(m, "meta")
-        
-        new_weight_file = os.path.join(new_m, "out")
-        new_meta_file = os.path.join(new_m, "meta")
-        
-        import subprocess
-        tensor_shape = subprocess.check_output(("grep dim {} | sed 's/dim: //g' | xargs").format(meta_file), shell=True)
-        
-        dims = list(map(int, str(tensor_shape, encoding = "utf8").strip().split()))
-
-        if len(dims)  == 4:
-            weight = []
-            # [n, c, h, w] - > [n, h, w, c]
-            # [0, 1, 2, 3] -> [0, 2, 3, 1]
-            binfile = open(weight_file, 'rb')
-            size = os.path.getsize(weight_file)
-            for i in range(size // 4):
-                data = binfile.read(4)
-                weight.append(struct.unpack('f', data))
-            
-            weight = np.array(weight, dtype=np.float32)
-            weight = weight.reshape(dims)
-            weight = np.transpose(weight, (0, 2, 3, 1))
-
-            os.mknod(new_weight_file)
-            
-            f = open(new_weight_file, 'wb')
-            f.write(np.ascontiguousarray(weight))
-            f.close()
-        else:
-            os.mknod(new_weight_file)
-            shutil.copy(weight_file, new_weight_file)
-        
-    elif (file == "snapshot_done"):
-        if not os.path.exists(new_m):
-            os.mknod(new_m)
-        shutil.copy(m, new_m)
-    else:
-        pass
-        
-
-
-
-
diff --git a/oneflow_onnx/x2oneflow/__init__.py b/oneflow_onnx/x2oneflow/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/oneflow_onnx/x2oneflow/handler.py b/oneflow_onnx/x2oneflow/handler.py
deleted file mode 100644
index 8136311669061add2e55b444d34fdde51ccda0c9..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handler.py
+++ /dev/null
@@ -1,306 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import copy
-import inspect
-import os
-import shutil
-
-from onnx import defs
-import oneflow as flow
-import oneflow
-
-class BackendHandler:
-    """
-  All operator handler MUST put decorator @onnx_op to register corresponding op.
-  """
-
-    ONNX_OP = None
-
-    DOMAIN = defs.ONNX_DOMAIN
-    VERSION = 0
-    SINCE_VERSION = 0
-    PARTIAL_SUPPORT = False
-    PS_DESCRIPTION = ""
-    ONEFLOW_BLOBNAME_MAP = {}
-    ONEFLOW_CODE_GEN = []
-    OP_OUTPUS = []
-
-    @classmethod
-    def check_cls(cls):
-        if not cls.ONNX_OP:
-            common.logger.warning(
-                "{} doesn't have ONNX_OP. "
-                "Please use BackendHandler.onnx_op decorator to register ONNX_OP.".format(
-                    cls.__name__
-                )
-            )
-
-    @classmethod
-    def handle(cls, node, tensor_dict, **kwargs):
-        """ Main method in handler. It will find corresponding versioned handle method,
-        whose name format is `version_%d`. So prefix `version_` is reserved in onnx-oneflow.
-        DON'T use it for other purpose.
-
-        :param node: NodeProto for backend.
-        :param kwargs: Other args.
-        :return: OneFlowNode for backend.
-        """
-        ver_handle = getattr(cls, "version_{}".format(cls.SINCE_VERSION), None)
-        if ver_handle:
-            return ver_handle(node, tensor_dict, **kwargs)
-        raise ValueError(
-            'node "{}" of version {} is not supported'.format(
-                node.op_type, cls.SINCE_VERSION
-            )
-        )
-        return None
-
-    @classmethod
-    def get_versions(cls):
-        """ Get all support versions.
-
-    :return: Version list.
-    """
-        versions = []
-        for k, v in inspect.getmembers(cls, inspect.ismethod):
-            if k.startswith("version_"):
-                versions.append(int(k.replace("version_", "")))
-        return versions
-
-    @staticmethod
-    def onnx_op(op):
-        return BackendHandler.property_register("ONNX_OP", op)
-
-    @staticmethod
-    def flow_func(func):
-        return BackendHandler.property_register("FLOW_FUNC", func)
-
-    @staticmethod
-    def domain(d):
-        return BackendHandler.property_register("DOMAIN", d)
-
-    @staticmethod
-    def partial_support(ps):
-        return BackendHandler.property_register("PARTIAL_SUPPORT", ps)
-
-    @staticmethod
-    def ps_description(psd):
-        return BackendHandler.property_register("PS_DESCRIPTION", psd)
-
-    @staticmethod
-    def property_register(name, value):
-        def deco(cls):
-            setattr(cls, name, value)
-            return cls
-
-        return deco
-
-    FLOW_FUNC = None
-    WEIGHT_SAVE_DIR = None
-
-    @classmethod
-    def copy_variable_file(cls, src_var_name, dst_var_name):
-        dst_dir_name = os.path.join(cls.WEIGHT_SAVE_DIR, dst_var_name)
-        if not os.path.exists(dst_dir_name):
-            os.makedirs(dst_dir_name)
-        shutil.copyfile(
-            os.path.join(cls.WEIGHT_SAVE_DIR, src_var_name, "out"),
-            os.path.join(dst_dir_name, "out"),
-        )
-
-    @classmethod
-    def get_attrs_processor_param(cls):
-        """ Get param for attrs processor.
-
-    :return: Dict.
-    """
-        return {}
-
-    @classmethod
-    def _process_attrs(cls, attrs):
-        """ Private method for processing attrs.
-        Param for this processor got from `get_attrs_processor_param`.
-        Param is dict contains two key: `default` and `raname`.
-        First add default value to attrs if key does not exist.
-        Second rename key to new key.
-
-        For example:
-        attrs = {"keep_dims": True}
-        param = {"default": {"axis": 1},
-                "rename": {"keep_dims": "keepdims"}}
-
-        processed_attrs = {"axis": "1", "keepdims": True}
-
-        :param attrs: Process target attrs.
-        :return: Processed attrs.
-        """
-        param = {"rename": {}, "default": {}}
-        param.update(cls.get_attrs_processor_param())
-
-        for k, v in param["default"].items():
-            attrs.setdefault(k, v)
-
-        for k, new_k in param["rename"].items():
-            if k in attrs:
-                attrs[new_k] = attrs.pop(k)
-
-        return attrs
-
-    @classmethod
-    def run_onnx_node(
-        cls,
-        node,
-        tensor_dict,
-        flow_func=None,
-        inputs=None,
-        attrs=None,
-        name="",
-        **kwargs
-    ):
-        """ Helper method to make tensor.
-
-        :param node: OnnxNode object.
-        :param flow_func: Callable OneFlow function. Default is cls.FLOW_FUNC.
-        :param inputs: Inputs tensor. Default is got from node.inputs.
-        :param attrs: Attributes. Default is node.attrs.
-        :param name: Node name.
-        :param kwargs: Other args.
-        :return: Tensor.
-        """
-        if flow_func is None:
-            flow_func = cls.FLOW_FUNC
-        if inputs is None:
-            inputs = [tensor_dict.get(inp, None) for inp in node.input_tensor_names]
-        if attrs is None:
-            attrs = copy.deepcopy(node.attrs)
-        if name != "":
-            attrs["name"] = name
-        for inp in node.input_tensor_names:
-            if tensor_dict[inp] not in cls.ONEFLOW_BLOBNAME_MAP:
-                cls.ONEFLOW_BLOBNAME_MAP[tensor_dict[inp]] = inp
-        cls.OP_OUTPUS = []
-        for oup in node.output_tensor_names:
-            cls.OP_OUTPUS.append(oup)
-        y = cls._run_flow_func(flow_func, inputs, attrs)
-        if type(y) == list():
-            for x in cls.OP_OUTPUS:
-                if y[x] not in cls.ONEFLOW_BLOBNAME_MAP:
-                    cls.ONEFLOW_BLOBNAME_MAP[y[x]] = x
-        else:
-            if y not in cls.ONEFLOW_BLOBNAME_MAP:
-                cls.ONEFLOW_BLOBNAME_MAP[y] = cls.OP_OUTPUS[0]
-        return y
-
-    @classmethod
-    def _run_flow_func(cls, flow_func, inputs, attrs):
-        """ Run Oneflow function.
-        Use only acceptable attributes of function from attrs.
-
-        :param flow_func: OneFlow function.
-        :param inputs: Inputs.
-        :param attrs: Attributes.
-        :return: Tensor.
-        """
-        params = list(inspect.signature(flow_func).parameters.keys())
-
-        attrs = cls._process_attrs(attrs)
-        attrs = {p: v for p, v in attrs.items() if p in params}
-        kwargs = dict(zip(params, inputs))
-        ambiguous_arguments = any(
-            kwargs.get(p) is not None and v is not None for p, v in attrs.items()
-        )
-        if ambiguous_arguments:
-            raise TypeError("Ambiguous arguments for {}()".format(flow_func.__name__))            
-        kwargs.update((p, v) for p, v in attrs.items() if v is not None)
-        pre_name = ''
-        if len(cls.OP_OUTPUS) == 1:
-            pre_name = cls.OP_OUTPUS[0] + ' = '
-        else:
-            for i in range(len(cls.OP_OUTPUS) - 1):
-                pre_name = pre_name + '{}, '.format(cls.OP_OUTPUS[i])
-            pre_name = pre_name + '{} = '.format(cls.OP_OUTPUS[len(cls.OP_OUTPUS) - 1])
-        if str(flow_func).split()[1] != 'api_get_variable' and (pre_name + cls.code_gen(flow_func, kwargs)) not in cls.ONEFLOW_CODE_GEN: 
-            cls.ONEFLOW_CODE_GEN.append(pre_name + cls.code_gen(flow_func, kwargs))
-        return flow_func(**kwargs)
-    
-    @classmethod
-    def code_gen(cls, flow_fun, kwargs):
-        def import_func(func):
-            flag = 0
-            if hasattr(flow.math, func):
-                flag = 1
-            elif hasattr(flow.layers, func):
-                flag = 2
-            elif hasattr(flow.nn, func):
-                flag = 3
-            elif hasattr(flow, func):
-                flag = 4
-            elif func == "api_get_variable":
-                return str("flow.get_variable")
-
-            if flag == 0:
-                raise NotImplementedError("can not import this func:{} from oneflow".format(func))
-            elif flag == 1:
-                return str("flow.math." + func)
-            elif flag == 2:
-                return str("flow.layers." + func)
-            elif flag == 3:
-                return str("flow.nn." + func)
-            elif flag == 4:
-                return str("flow." + func)
-        
-        func = str(flow_fun).split()
-        func = func[1]
-        func = import_func(func)
-
-        func += '('
-        for k, v in kwargs.items():
-            func += str(k) + '='
-            if type(v) == list:
-                new_v = []
-                for x in v:
-                    if type(x) ==  oneflow._oneflow_internal.LazyConsistentBlob:
-                        new_v.append(cls.ONEFLOW_BLOBNAME_MAP[x])
-                    else:
-                        new_v.append(x)
-                v = new_v
-                func += '['
-                for x in v:
-                    func += str(x) + ', '
-                func += '], '
-            elif type(v) == oneflow._oneflow_internal.LazyConsistentBlob:
-                v = cls.ONEFLOW_BLOBNAME_MAP[v]
-                func += str(v) + ', '
-            else:
-                func += str(v) + ', '
-        func += ')\n'
-
-        return func
-
-
-
-domain = BackendHandler.domain
-onnx_op = BackendHandler.onnx_op
-flow_func = BackendHandler.flow_func
-partial_support = BackendHandler.partial_support
-ps_description = BackendHandler.ps_description
-oneflow_blobname_map = BackendHandler.ONEFLOW_BLOBNAME_MAP
-oneflow_code_gen = BackendHandler.ONEFLOW_CODE_GEN
\ No newline at end of file
diff --git a/oneflow_onnx/x2oneflow/handlers/__init__.py b/oneflow_onnx/x2oneflow/handlers/__init__.py
deleted file mode 100644
index 27ebf065358b8fd51842f14e5a4e394a89aeda1b..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import os
-import pkgutil
-
-__all__ = [
-    modname
-    for _, modname, _ in pkgutil.walk_packages(path=[os.path.split(__file__)[0]])
-]
diff --git a/oneflow_onnx/x2oneflow/handlers/array.py b/oneflow_onnx/x2oneflow/handlers/array.py
deleted file mode 100644
index 185b56d7cf6d78e0b3f2f2f36b8325e9f64b6944..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/array.py
+++ /dev/null
@@ -1,457 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import operator
-from functools import reduce
-
-import numpy as np
-import oneflow as flow
-
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-from oneflow_onnx.x2oneflow.handler import onnx_op
-from oneflow_onnx.x2oneflow.handler import flow_func
-import oneflow.typing as tp
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-
-
-@onnx_op("Identity")
-@flow_func(flow.identity)
-class Identity(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Reshape")
-@flow_func(flow.reshape)
-class Reshape(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        init_dict = kwargs["init_dict"]
-        x = tensor_dict[node.input_tensor_names[0]]
-        if cls.SINCE_VERSION == 1:
-            shape = node.attrs["shape"]
-        else:  # since_version >= 5
-            shape = init_dict[node.input_tensor_names[1]]
-            node.attrs["shape"] = shape.tolist()
-            del node.input_tensor_names[1]
-        # TODO(daquexian)): update oneflow reshape to support 0 and np.ndarray
-        return [cls.run_onnx_node(node, tensor_dict, **kwargs)]
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_5(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_14(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Flatten")
-class Flatten(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        shape = x.shape
-        axis = node.attrs.get("axis", 1)
-        if axis == 0:
-            cal_shape = (1, -1)
-        else:
-            cal_shape = (
-                reduce(operator.mul, shape[:axis], 1),
-                reduce(operator.mul, shape[axis:]),
-            )
-            # cal_shape = (tf.reduce_prod(shape[0:axis]),
-            # tf.reduce_prod(shape[axis:tf.size(shape)]))
-        func = '{} = flow.reshape({}, shape={})\n'.format(node.output_tensor_names[0], node.input_tensor_names[0], cal_shape)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        return flow.reshape(x, cal_shape)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Concat")
-@flow_func(flow.concat)
-class Concat(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        for x in node.input_tensor_names:
-            if tensor_dict[x] not in oneflow_blobname_map:
-                
-                func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-                func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-                func = '{} = flow.get_variable('.format(x)
-                func = func + 'name={}, '.format("'"+x+"'")
-                func = func + 'shape={}, '.format(list(tensor_dict[x].shape))
-                func = func + 'initializer=weight_initializer, '
-                func = func + 'regularizer=weight_regularizer)\n'
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-
-                oneflow_blobname_map[tensor_dict[x]] = x
-        inputs = [tensor_dict[inp] for inp in node.input_tensor_names]
-        return cls.run_onnx_node(node, tensor_dict, inputs=[inputs])
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_4(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Unsqueeze")
-@flow_func(flow.expand_dims)
-class Unsqueeze(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = node.attrs.pop("axes")
-        if len(axes) != 1:
-            x = tensor_dict[node.input_tensor_names[0]]
-            for axis in sorted(axes):
-                x = flow.expand_dims(x, axis=axis)
-                func = '{} = flow.expand_dims({}, axis={})\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], axis)
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-
-            func = '{} = {}\n'.format(node.output_tensor_names[0], node.input_tensor_names[0])
-            if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-            if x not in oneflow_blobname_map:
-                oneflow_blobname_map[x] = node.output_tensor_names[0]
-            return x
-        node.attrs["axis"] = axes[0]
-        y =  cls.run_onnx_node(node, tensor_dict, **kwargs)
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-        return y
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Squeeze")
-@flow_func(flow.squeeze)
-class Squeeze(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        if node.attrs.get("axes"):
-            axes = node.attrs.pop("axes")
-            node.attrs["axis"] = axes
-        y =  cls.run_onnx_node(node, tensor_dict, **kwargs)
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-        return y
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Expand")
-@flow_func(flow.broadcast_like)
-class Expand(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-
-        x = tensor_dict[node.input_tensor_names[0]]
-        init_dict = kwargs["init_dict"]
-        shape = init_dict[node.input_tensor_names[1]].tolist()
-        if x not in oneflow_blobname_map:
-            oneflow_blobname_map[x] = node.input_tensor_names[0]
-        
-        func = '{} = flow.expand({}, expand_size=[{}, {}, {}, {}])\n'.format(node.output_tensor_names[0], node.input_tensor_names[0], 
-                                                                                            shape[0], shape[1], shape[2], shape[3])
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        
-        y = flow.expand(x, expand_size=[shape[0], shape[1], shape[2], shape[3]])
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-        return y
-
-    @classmethod
-    def version_8(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Transpose")
-@flow_func(flow.transpose)
-class Transpose(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Gather")
-@flow_func(flow.gather)
-class Gather(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        output = cls.run_onnx_node(node, tensor_dict, **kwargs)
-        init_dict = kwargs["init_dict"]
-        if node.input_tensor_names[1] not in init_dict:
-            # TODO(daquexian): handle 0-d indices here
-            return output
-        else:
-            if len(init_dict[node.input_tensor_names[1]].shape) == 0:
-                output = flow.squeeze(output, axis=[node.attrs["axis"]])
-            return output
-
-
-@onnx_op("Slice")
-@flow_func(flow.slice_v2)
-class Slice(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = node.attrs.pop("axes", list(range(len(x.shape))))
-        ends = node.attrs.pop("ends")
-        starts = node.attrs.pop("starts")
-        slice_tup_list = []
-        j = 0
-        for i in range(len(x.shape)):
-            if i in axes:
-                slice_tup_list.append((starts[j], ends[j], 1))
-                j = j + 1
-            else:
-                slice_tup_list.append((None, None, None))
-        node.attrs["slice_tup_list"] = slice_tup_list
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_10(cls, node, tensor_dict, **kwargs):
-        init_dict = kwargs["init_dict"]
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = list(range(len(x.shape)))
-        if len(node.input_tensor_names) > 3:
-            axes = init_dict.get(node.input_tensor_names[3], list(range(len(x.shape))))
-        steps = [1] * len(x.shape)
-        if len(node.input_tensor_names) > 4:
-            steps = init_dict.get(node.input_tensor_names[4], [1] * len(x.shape))
-        starts = init_dict[node.input_tensor_names[1]]
-        ends = init_dict[node.input_tensor_names[2]]
-        slice_tup_list = []
-        j = 0
-        for i in range(len(x.shape)):
-            if i in axes:
-                start, end, step = int(starts[j]), int(ends[j]), int(steps[j])
-                if start == np.iinfo(np.int64).max:
-                    start = None
-                if end in [np.iinfo(np.int64).max, np.iinfo(np.int64).min]:
-                    end = None
-                slice_tup_list.append((start, end, step))
-                j = j + 1
-            else:
-                slice_tup_list.append((None, None, None))
-        node.attrs["slice_tup_list"] = slice_tup_list
-        node.input_tensor_names = node.input_tensor_names[:1]
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls.version_10(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Split")
-class Split(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        # for code gen
-        if x not in oneflow_blobname_map:
-            oneflow_blobname_map[x] = node.input_tensor_names[0]
-        
-        axis = node.attrs.get("axis")
-        split = node.attrs.get("split")
-        index = 0
-        ans = []
-        if(split == None):
-            split = []
-            x_shape = list(x.shape)
-            for i in range(len(node.output_tensor_names)):
-                split.append(x_shape[axis] // len(node.output_tensor_names))
-        for i in range(len(split)):
-            if axis == 1:
-                tmp = flow.experimental.logical_slice(
-                    x,
-                    [
-                        [None, None, None],
-                        [index, index + split[i], 1],
-                        [None, None, None],
-                        [None, None, None],
-                    ],
-                )
-                func = '{} = flow.experimental.logical_slice({}, [[None, None, None], [{}, {} + {}, 1], [None, None, None], [None, None, None], ], )\n'.format(
-                        node.output_tensor_names[i], node.input_tensor_names[0], index, index, split[i])
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-                
-            elif axis == 3:
-                tmp = flow.experimental.logical_slice(
-                    x,
-                    [
-                        [None, None, None],
-                        [None, None, None],
-                        [None, None, None],
-                        [index, index + split[i], 1],
-                    ],
-                )
-                func = '{} = flow.experimental.logical_slice({}, [[None, None, None], [None, None, None], [None, None, None], [{}, {} + {}, 1], ], )\n'.format(
-                        node.output_tensor_names[i], node.input_tensor_names[0], index, index, split[i])
-                if func not in oneflow_code_gen:
-                    oneflow_code_gen.append(func)
-            else:
-                raise ValueError("axis != 0 or 3 is not supported")
-            index += split[i]
-            ans.append(tmp)
-        for i in range(len(ans)):
-            if ans[i] not in oneflow_blobname_map:
-                oneflow_blobname_map[ans[i]] = node.output_tensor_names[i]
-        return ans
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_2(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Min")
-class Min(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        min_v = tensor_dict[node.input_tensor_names[1]]
-        if node.input_tensor_names[1] in kwargs["init_dict"]:
-            min_v = kwargs["init_dict"][node.input_tensor_names[1]]
-            return flow.math.clip_by_value(x, min_value=min_v)
-        return flow.math.minimum(x, min_v)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_8(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Max")
-class Max(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        max_v = tensor_dict[node.input_tensor_names[1]]
-        if node.input_tensor_names[1] in kwargs["init_dict"]:
-            max_v = kwargs["init_dict"][node.input_tensor_names[1]]
-            return flow.math.clip_by_value(x, max_value=max_v)
-        return flow.math.maximum(x, max_v)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_8(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
diff --git a/oneflow_onnx/x2oneflow/handlers/common.py b/oneflow_onnx/x2oneflow/handlers/common.py
deleted file mode 100644
index 9216af1612654c977cd69e62ae30bdde98438523..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/common.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import copy
-import oneflow
-import numpy as np
-
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-
-class BroadcastMixin(object):
-    @classmethod
-    def explicit_broadcast(cls, inputs, axis=None, tensor_dict=None):
-        x = tensor_dict[inputs[0]]
-        y = tensor_dict[inputs[1]]
-
-        if np.prod(y.shape) == 1:
-            return y
-
-        if axis is None:
-            return y
-
-        total_num_dim = len(x.get_shape())
-        if axis < 0:
-            axis += total_num_dim
-
-        if axis + len(y.get_shape()) == total_num_dim:
-            return y
-
-        dims = [axis + i for i in range(len(y.get_shape()))]
-        new_y = y
-        for i in range(total_num_dim):
-            if i not in dims:
-                raise NotImplementedError()
-                # new_y = tf.expand_dims(new_y, i)
-        return new_y
-
-    @classmethod
-    def limited_broadcast(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.inputs[0]]
-        y = tensor_dict[node.inputs[1]]
-        if node.attrs.get("broadcast") == 1:
-            y = cls.explicit_broadcast([x, y], node.attrs.get("axis", None))
-            return [cls.run_onnx_node(node, inputs=[x, y], **kwargs)]
-        return [cls.run_onnx_node(node, **kwargs)]
-
-
-class ConvMixin(BroadcastMixin):
-    @classmethod
-    def conv(cls, node, input_dict, transpose=False):
-        """ Convolution method for both conv and transposed conv
-        For transposed conv,
-        Attr pads is not used for input, but declares how much output is padded.
-        Here, output means output from transposed conv which already pad output_padding if set.
-        So the pseudo explanation for output should be:
-            output = conv_transpose_output + output_padding - pads
-        And conv_transpose_output shape should be:
-            conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i]
-        """
-        x = input_dict[node.input_tensor_names[0]]
-        x_shape = list(x.shape)
-        x_rank = len(x_shape)
-        spatial_size = x_rank - 2
-
-        in_weights = input_dict[node.input_tensor_names[1]]
-        in_weights_shape = list(in_weights.shape)
-        
-        # code gen for conv weight_initializer
-        func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        #code gen for conv weight_regularizer
-        func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        weights_rank = len(in_weights_shape)
-        if transpose:
-            # Translate weights from (C x M x KH x KW) to (KH x KW X M X C)
-            perm = list(range(2, weights_rank)) + [1, 0]
-        else:
-            # Translate weights from (M x C x KH x KW) to (KH x KW X C X M)
-            perm = list(range(2, weights_rank)) + [1, 0]
-
-        if "kernel_shape" in node.attrs.keys():
-            kernel_shape = node.attrs["kernel_shape"]
-            assert in_weights_shape[2:] == kernel_shape, (
-                "kernel_shape "
-                "attr of convolution does not match the actual weight "
-                "passed to this operation, attr {}, actual {}"
-            ).format(kernel_shape, in_weights_shape)
-        else:
-            kernel_shape = in_weights_shape[2:]
-
-        weights = in_weights
-        # code gen for conv weights
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-        func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-        func = func + 'shape={}, '.format(in_weights_shape)
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        dilations = node.attrs.get("dilations", [1] * spatial_size)
-        strides = node.attrs.get("strides", [1] * spatial_size)
-
-        pads = node.attrs.get("pads", [0, 0] * spatial_size)
-        pad_flag = 0
-        # Check auto_pad nonexistent or NOTSET first
-        if "auto_pad" not in node.attrs or node.attrs["auto_pad"] == "NOTSET":
-            if not transpose:
-                if pads != [0, 0] * spatial_size:
-                    x = PadMixin.get_padding_as_op(x, pads)
-
-                    num_dim = int(len(pads) / 2)
-                    flow_pads = (
-                        np.transpose(np.array(pads).reshape([2, num_dim])).astype(np.int32).tolist()
-                    )
-                    # flow_pads = [0, 0, 0, 0] + flow_pads.flatten().tolist()
-                    flow_pads = [(0, 0), (0, 0)] + flow_pads
-                    func = '{}_tmp_conv_pad = flow.pad({}, paddings={})\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], flow_pads)
-                    pad_flag = 1
-                    if func not in oneflow_code_gen:
-                        oneflow_code_gen.append(func)
-                pad_mode = "VALID"
-            else:
-                pad_mode = "NOTSET"
-        # Then we use auto_pad to setup pad_mode
-        elif node.attrs["auto_pad"] == "SAME_UPPER":
-            pad_mode = "SAME"
-        elif node.attrs["auto_pad"] == "VALID":
-            pad_mode = "VALID"
-        elif node.attrs["auto_pad"] == "SAME_LOWER":
-            pad_mode = "SAME_LOWER"
-        else:
-            raise ValueError(
-                "Invalid auto_pad attribute: {}".format(node.attrs["auto_pad"])
-            )
-
-        group = node.attrs.get("group", 1)
-
-        conv = oneflow.nn.conv2d(
-            x,
-            weights,
-            padding=pad_mode,
-            strides=strides,
-            dilations=dilations,
-            data_format="NCHW",
-            groups=group,
-        )
-
-        # code gen for conv
-        oneflow_blobname_map[x] = node.input_tensor_names[0]
-        oneflow_blobname_map[weights] = node.input_tensor_names[1]
-        oneflow_blobname_map[conv] = node.output_tensor_names[0]
-        
-        func = '{} = '.format(node.output_tensor_names[0])
-        func = func + 'flow.nn.conv2d('
-        if pad_flag == 0:
-            func = func + node.input_tensor_names[0] + ', '
-        else:
-            func = func + '{}_tmp_conv_pad'.format(node.input_tensor_names[0]) + ', '
-        func = func + node.input_tensor_names[1] + ', '
-        func = func + 'padding={}, '.format("'"+pad_mode+"'")
-        func = func + 'strides={}, '.format(strides)
-        func = func + 'data_format={}, '.format("'NCHW'")
-        func = func + 'groups={})\n'.format(group)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        if len(node.input_tensor_names) == 2:
-            output = conv
-        else:
-            bias = input_dict[node.input_tensor_names[2]]
-            output = oneflow.nn.bias_add(conv, bias)
-            oneflow_blobname_map[output] = node.output_tensor_names[0]
-            # code gen for bias_add
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[2])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[2]+"'")
-            func = func + 'shape={}, '.format(list(bias.shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-
-            oneflow_blobname_map[bias] = node.input_tensor_names[2]
-            func = '{} = '.format(node.output_tensor_names[0])
-            func = func + 'flow.nn.bias_add('
-            func = func + node.output_tensor_names[0] + ', '
-            func = func + node.input_tensor_names[2] + ', '
-            func = func + 'data_format={})\n'.format("'NCHW'")
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        return [output]
-
-
-class PadMixin(object):
-    @classmethod
-    def get_padding_as_op(cls, x, pads):
-        num_dim = int(len(pads) / 2)
-
-        flow_pads = (
-            np.transpose(np.array(pads).reshape([2, num_dim])).astype(np.int32).tolist()
-        )
-        # flow_pads = [0, 0, 0, 0] + flow_pads.flatten().tolist()
-        flow_pads = [(0, 0), (0, 0)] + flow_pads
-
-        return oneflow.pad(x, flow_pads)
-
-
-class BasicMathMixin(BroadcastMixin):
-    pass
-
-
-class ArithmeticMixin(BroadcastMixin):
-    pass
-
-
-class ReductionMixin(BroadcastMixin):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        attrs = copy.deepcopy(node.attrs)
-        axis = attrs.pop("axes", None)
-        if isinstance(axis, (list, tuple)) and len(axis) == 1:
-            axis = axis[0]
-        attrs["axis"] = axis
-        # https://github.com/onnx/onnx/issues/585
-        attrs["keepdims"] = attrs.pop("keepdims", 1) == 1
-        return cls.run_onnx_node(node, tensor_dict, attrs=attrs, **kwargs)
diff --git a/oneflow_onnx/x2oneflow/handlers/constant.py b/oneflow_onnx/x2oneflow/handlers/constant.py
deleted file mode 100644
index d1678084a9b8996ca0bf34032c47de12938d5506..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/constant.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-
-from onnx import numpy_helper
-
-import oneflow as flow
-from oneflow_onnx import util
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-from oneflow_onnx.x2oneflow.handler import onnx_op
-from oneflow_onnx.x2oneflow.handler import flow_func
-
-import os
-
-
-@onnx_op("Constant")
-@flow_func(flow.get_variable)
-class Constant(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        attr_value = node.attrs["value"]
-        dtype = util.Onnx2FlowDtype(attr_value.data_type)
-        shape = numpy_helper.to_array(attr_value).shape
-        # we do not support 0d tensor
-        if len(shape) == 0:
-            shape = (1,)
-        return [
-            cls.run_onnx_node(
-                node,
-                tensor_dict,
-                # inputs=[value],
-                # attrs={"dtype": dtype}
-                name=node.output_tensor_names[0],
-                attrs={
-                    "dtype": dtype,
-                    "trainable": False,
-                    "shape": shape,
-                    "initializer": flow.zeros_initializer(),
-                },
-            )
-        ]
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        # either value or sparse_value
-        if "value" in node.attrs:
-            return cls._common(node, tensor_dict, **kwargs)
-        else:
-            raise NotImplementedError("sparse tensor is not supported")
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        if "value" in node.attrs or "sparse_value" in node.attrs:
-            return cls.version_11(node, tensor_dict, **kwargs)
-        raise NotImplementedError("opset 12 constant is not supported")
diff --git a/oneflow_onnx/x2oneflow/handlers/math.py b/oneflow_onnx/x2oneflow/handlers/math.py
deleted file mode 100644
index 13410bbd628bddc55036438d52c15c83efd2617d..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/math.py
+++ /dev/null
@@ -1,987 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import oneflow as flow
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-from oneflow_onnx.x2oneflow.handler import onnx_op
-from oneflow_onnx.x2oneflow.handler import flow_func
-from oneflow_onnx.x2oneflow.handlers.common import ArithmeticMixin, BasicMathMixin
-from oneflow_onnx import util as onnx_util
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-
-@onnx_op("Add")
-@flow_func(flow.math.add)
-class Add(ArithmeticMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        if tensor_dict[node.input_tensor_names[1]] not in oneflow_blobname_map:
-            oneflow_blobname_map[tensor_dict[node.input_tensor_names[1]]] = node.input_tensor_names[1]
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-            func = func + 'shape={}, '.format(list(tensor_dict[node.input_tensor_names[1]].shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Sub")
-@flow_func(flow.math.subtract)
-class Sub(ArithmeticMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        if tensor_dict[node.input_tensor_names[1]] not in oneflow_blobname_map:
-            oneflow_blobname_map[tensor_dict[node.input_tensor_names[1]]] = node.input_tensor_names[1]
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-            func = func + 'shape={}, '.format(list(tensor_dict[node.input_tensor_names[1]].shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Mul")
-@flow_func(flow.math.multiply)
-class Mul(ArithmeticMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        if tensor_dict[node.input_tensor_names[1]] not in oneflow_blobname_map:
-            # code gen for conv weight_initializer
-            func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            #code gen for conv weight_regularizer
-            func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-
-            oneflow_blobname_map[tensor_dict[node.input_tensor_names[1]]] = node.input_tensor_names[1]
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-            func = func + 'shape={}, '.format(list(tensor_dict[node.input_tensor_names[1]].shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Div")
-@flow_func(flow.math.divide)
-class Div(ArithmeticMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.limited_broadcast(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        if tensor_dict[node.input_tensor_names[1]] not in oneflow_blobname_map:
-            oneflow_blobname_map[tensor_dict[node.input_tensor_names[1]]] = node.input_tensor_names[1]
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-            func = func + 'shape={}, '.format(list(tensor_dict[node.input_tensor_names[1]].shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Pow")
-@flow_func(flow.math.pow)
-class Pow(ArithmeticMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        y = tensor_dict[node.input_tensor_names[1]]
-        if y not in oneflow_blobname_map:
-            func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-            func = func + 'shape={}, '.format(list(y.shape))
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-
-        if len(y.shape) > len(x.shape):
-            x = flow.math.broadcast_to_compatible_with(x, [y])
-            func = '{} = flow.math.broadcast_to_compatible_with({}, [{}])\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], node.input_tensor_names[1])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        elif len(x.shape) > len(y.shape):
-            func = '{} = flow.math.broadcast_to_compatible_with({}, [{}])\n'.format(node.input_tensor_names[1], node.input_tensor_names[1], node.input_tensor_names[0])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            y = flow.math.broadcast_to_compatible_with(y, [x])
-        
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.input_tensor_names[1]
-        
-        func = '{} = flow.math.pow({}, {})\n'.format(node.output_tensor_names[0], node.input_tensor_names[0], node.input_tensor_names[1])
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        z = flow.math.pow(x, y)
-        if z not in oneflow_blobname_map:
-            oneflow_blobname_map[z] = node.output_tensor_names[0]
-        return z
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls.version_1(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls.version_1(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Tanh")
-@flow_func(flow.math.tanh_v2)
-class Tanh(BasicMathMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return [cls.run_onnx_node(node, tensor_dict, **kwargs)]
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return [cls.run_onnx_node(node, tensor_dict, **kwargs)]
-
-
-@onnx_op("Sigmoid")
-@flow_func(flow.math.sigmoid)
-class Sigmoid(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("HardSigmoid")
-class HardSigmoid(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        alpha = node.attrs.get("alpha")
-        beta = node.attrs.get("beta")
-        return flow.clip(x * alpha + beta, 0, 1.0)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Gemm")
-class Gemm(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        y = tensor_dict[node.input_tensor_names[1]]
-
-        # code gen for gemm B
-        gemm_weight_shape = list(tensor_dict[node.input_tensor_names[1]].shape)
-        # code gen for gemm weight_initializer
-        func = 'gemm_initializer = flow.truncated_normal(0.1)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        #code gen for gemm weight_regularizer
-        func = 'gemm_regularizer = flow.regularizers.l2(0.0005)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        # code gen for gemm weight_shape
-        # code gen for gemm weights
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-        func = func + 'name={}, '.format("'" + node.input_tensor_names[1] + "'")
-        func = func + 'shape={}, '.format(gemm_weight_shape)
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        if len(node.input_tensor_names) > 2:
-            z = tensor_dict[node.input_tensor_names[2]]
-            oneflow_blobname_map[z] = node.input_tensor_names[2]
-            # code gen for gemm bias
-            gemm_bias_shape = list(tensor_dict[node.input_tensor_names[2]].shape)
-            # code gen for gemm weights
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[2])
-            func = func + 'name={}, '.format("'" + node.input_tensor_names[2] + "'")
-            func = func + 'shape={}, '.format(gemm_bias_shape)
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-        else:
-            z = 0
-
-        transA = False if node.attrs.get("transA", 0) == 0 else True
-        transB = False if node.attrs.get("transB", 0) == 0 else True
-        alpha = node.attrs.get("alpha", 1.0)
-        beta = node.attrs.get("beta", 1.0)
-
-        #code gen for gemm
-        oneflow_blobname_map[x] = node.input_tensor_names[0]
-        oneflow_blobname_map[y] = node.input_tensor_names[1]
-        func = '{} = '.format(node.output_tensor_names[0])
-        func = func + '{} * '.format(alpha)
-        func = func + 'flow.linalg.matmul('
-        func = func + node.input_tensor_names[0] + ', '
-        func = func + node.input_tensor_names[1] + ', '
-        func = func + 'transpose_a={}, '.format(transA)
-        func = func + 'transpose_b={}) '.format(transB)
-
-        if z not in oneflow_blobname_map:
-            func = func + ' + {} * {}\n'.format(beta, z)
-        else:
-            func = func + ' + {} * {}\n'.format(beta, node.input_tensor_names[2])
-        
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        return [
-            alpha * flow.linalg.matmul(x, y, transpose_a=transA, transpose_b=transB)
-            + beta * z
-        ]
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("MatMul")
-@flow_func(flow.linalg.matmul)
-class MatMul(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        y = tensor_dict[node.input_tensor_names[1]]
-
-        if y not in oneflow_blobname_map:
-            # code gen for matmul B
-            matmul_weight_shape = list(tensor_dict[node.input_tensor_names[1]].shape)
-            # code gen for matmul weight_initializer
-            func = 'matmul_initializer = flow.truncated_normal(0.1)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            #code gen for matmul weight_regularizer
-            func = 'matmul_regularizer = flow.regularizers.l2(0.0005)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            # code gen for matmul weight_shape
-            # code gen for matmul weights
-            func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-            func = func + 'name={}, '.format("'" + node.input_tensor_names[1] + "'")
-            func = func + 'shape={}, '.format(matmul_weight_shape)
-            func = func + 'initializer=weight_initializer, '
-            func = func + 'regularizer=weight_regularizer)\n'
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            
-            oneflow_blobname_map[y] = node.input_tensor_names[1]
-
-        # TODO BBuf: add broadcast code_gen
-        if len(y.shape) > len(x.shape):
-            broadcast_shape = y.shape[:-2] + x.shape[-2:]
-            constant_for_broadcast = flow.constant(
-                value=0, dtype=flow.float32, shape=broadcast_shape
-            )
-            func = '{}_broadcast_shape = flow.constant(value=0, dtype=flow.float32, shape={})\n'.format(node.input_tensor_names[0], broadcast_shape)
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            func = '{} = flow.math.broadcast_to_compatible_with({}, [{}_broadcast_shape])\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], node.input_tensor_names[0])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            x = flow.math.broadcast_to_compatible_with(x, [constant_for_broadcast])
-            if x not in oneflow_blobname_map:
-                oneflow_blobname_map[x] = node.input_tensor_names[0]
-        elif len(x.shape) > len(y.shape):
-            broadcast_shape = x.shape[:-2] + y.shape[-2:]
-            constant_for_broadcast = flow.constant(
-                value=0, dtype=flow.float32, shape=broadcast_shape
-            )
-            func = '{}_broadcast_shape= flow.constant(value=0, dtype=flow.float32, shape={})\n'.format(node.input_tensor_names[1], broadcast_shape)
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            func = '{} = flow.math.broadcast_to_compatible_with({}, [{}_broadcast_shape])\n'.format(node.input_tensor_names[1], node.input_tensor_names[1], node.input_tensor_names[1])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            y = flow.math.broadcast_to_compatible_with(y, [constant_for_broadcast])
-            if y not in oneflow_blobname_map:
-                oneflow_blobname_map[y] = node.input_tensor_names[1]
-        return cls.run_onnx_node(node, tensor_dict, inputs=(x, y), **kwargs)
-
-
-@onnx_op("Clip")
-class Clip(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        x_dtype = x.dtype
-        if cls.SINCE_VERSION < 11:
-            # min/max were required and passed as attributes
-            clip_value_min = node.attrs.get("min", None)
-            clip_value_max = node.attrs.get("max", None)
-        else:
-            # min/max are optional and passed as input_tensor_names
-            init_dict = kwargs["init_dict"]
-            clip_value_min = (
-                init_dict[node.input_tensor_names[1]].item()
-                if len(node.input_tensor_names) > 1 and node.input_tensor_names[1] != ""
-                else None
-            )
-            clip_value_max = (
-                init_dict[node.input_tensor_names[2]].item()
-                if len(node.input_tensor_names) > 2 and node.input_tensor_names[2] != ""
-                else None
-            )
-
-        if x not in oneflow_blobname_map:
-            oneflow_blobname_map[x] = node.input_tensor_names[0]
-        func = '{} = flow.math.clip_by_value({}, {}, {})\n'.format(node.output_tensor_names[0], node.input_tensor_names[0], clip_value_min, clip_value_max)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        y = flow.math.clip_by_value(x, clip_value_min, clip_value_max)
-
-        return y
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Sqrt")
-@flow_func(flow.math.sqrt)
-class Sqrt(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Erf")
-@flow_func(flow.math.erf)
-class Erf(BackendHandler):
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Cast")
-@flow_func(flow.cast)
-class Cast(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        dtype = onnx_util.Onnx2FlowDtype(node.attrs.pop("to"))
-        node.attrs["dtype"] = dtype
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Abs")
-class Abs(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        return flow.math.abs(x)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Exp")
-class Exp(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        return flow.math.exp(x)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Reciprocal")
-class Reciprocal(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        return 1.0 / x
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Floor")
-class Floor(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        return flow.math.floor(x)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("ArgMax")
-class ArgMax(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axis = node.attrs.get("axis")
-        return flow.math.argmax(x, axis=axis)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("ArgMin")
-class ArgMin(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        x = flow.math.negative(x)
-        axis = node.attrs.get("axis")
-        return flow.math.argmax(x, axis=axis)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Range")
-@flow_func(flow.range)
-class Range(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Greater")
-@flow_func(flow.math.greater)
-class Greater(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Less")
-@flow_func(flow.math.less)
-class Less(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Softplus")
-@flow_func(flow.math.softplus)
-class Softplus(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-@onnx_op("Neg")
-@flow_func(flow.math.negative)
-class Neg(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Ceil")
-@flow_func(flow.math.ceil)
-class Ceil(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Where")
-@flow_func(flow.where)
-class Where(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Equal")
-@flow_func(flow.math.equal)
-class Equal(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Sign")
-@flow_func(flow.math.sign)
-class Sign(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("NonZero")
-@flow_func(flow.nonzero)
-class NonZero(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-@onnx_op("Acos")
-@flow_func(flow.math.acos)
-class Acos(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Acosh")
-@flow_func(flow.math.acosh)
-class AcosH(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Asin")
-@flow_func(flow.math.asin)
-class Asin(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Atan")
-@flow_func(flow.math.atan)
-class Atan(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Cos")
-@flow_func(flow.math.cos)
-class Cos(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Elu")
-@flow_func(flow.nn.elu)
-class Elu(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Exp")
-@flow_func(flow.math.exp)
-class Exp(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Log")
-@flow_func(flow.math.log)
-class Log(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("LogSoftmax")
-@flow_func(flow.nn.logsoftmax)
-class LogSoftmax(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("ReduceLogSumExp")
-@flow_func(flow.math.reduce_logsumexp)
-class ReduceLogSumExp(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        axis = node.attrs.pop("axes")
-        node.attrs["axis"] = axis
-        keepdims = bool(node.attrs.pop("keepdims"))
-        node.attrs["keepdims"] = keepdims
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Round")
-@flow_func(flow.math.round)
-class Round(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Sin")
-@flow_func(flow.math.sin)
-class Sin(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Tan")
-@flow_func(flow.math.tan)
-class Tan(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("Tanh")
-@flow_func(flow.math.tanh)
-class Tanh(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
\ No newline at end of file
diff --git a/oneflow_onnx/x2oneflow/handlers/nn.py b/oneflow_onnx/x2oneflow/handlers/nn.py
deleted file mode 100644
index 804053c95df4ea11679802b24ac77e7a9cc82be0..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/nn.py
+++ /dev/null
@@ -1,440 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import oneflow as flow
-import string
-import random
-import operator
-from functools import reduce
-
-import numpy as np
-
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-from oneflow_onnx.x2oneflow.handler import flow_func
-from oneflow_onnx.x2oneflow.handler import onnx_op
-from oneflow_onnx.x2oneflow.handlers.common import ConvMixin
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-
-@onnx_op("Conv")
-class Conv(ConvMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.conv(node, tensor_dict)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls.conv(node, tensor_dict)
-
-
-@onnx_op("BatchNormalization")
-@flow_func(flow.layers.batch_normalization)
-class BatchNormalization(BackendHandler):
-    @classmethod
-    def get_attrs_processor_param(cls):
-        return {
-            "default": {"epsilon": 1e-5},
-        }
-
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-
-        # code gen for batchnorm
-        func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        scale = tensor_dict[node.input_tensor_names[1]]
-        offset = tensor_dict[node.input_tensor_names[2]]
-        mean = tensor_dict[node.input_tensor_names[3]]
-        variance = tensor_dict[node.input_tensor_names[4]]
-        epsilon = node.attrs.get("epsilon", 1e-5)
-
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
-        func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
-        func = func + 'shape={}, '.format(list(scale.shape))
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[2])
-        func = func + 'name={}, '.format("'"+node.input_tensor_names[2]+"'")
-        func = func + 'shape={}, '.format(list(offset.shape))
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[3])
-        func = func + 'name={}, '.format("'"+node.input_tensor_names[3]+"'")
-        func = func + 'shape={}, '.format(list(mean.shape))
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        
-        func = '{} = flow.get_variable('.format(node.input_tensor_names[4])
-        func = func + 'name={}, '.format("'"+node.input_tensor_names[4]+"'")
-        func = func + 'shape={}, '.format(list(variance.shape))
-        func = func + 'initializer=weight_initializer, '
-        func = func + 'regularizer=weight_regularizer)\n'
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        
-        func = '{} = flow.nn.batch_normalization('.format(node.output_tensor_names[0])
-        func = func + 'x={}, mean={}, variance={}, offset={}, scale={}, axis=1, variance_epsilon={})\n'.format(node.input_tensor_names[0], node.input_tensor_names[3],
-                                                                                                        node.input_tensor_names[4], node.input_tensor_names[2], node.input_tensor_names[1], epsilon)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        y = flow.nn.batch_normalization(x, mean=mean, variance=variance, offset=offset, scale=scale, axis=1, variance_epsilon=epsilon)
-        
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-
-        return y
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-class PoolMixin(object):
-    @classmethod
-    def pool(cls, node, input_dict, pooling_type, strict=True):
-        x = input_dict[node.input_tensor_names[0]]
-        orig_x = x
-
-        kernel_shape = node.attrs["kernel_shape"]
-
-        spatial_size = len(kernel_shape)
-        x_rank = spatial_size + 2
-
-        kernel_shape = node.attrs["kernel_shape"]
-        strides = node.attrs.get("strides", [1] * spatial_size)
-        dilations = node.attrs.get("dilations", [1] * spatial_size)
-        ceil_mode = node.attrs.get("ceil_mode")
-        pads = node.attrs.get("auto_pad", "NOTSET")
-        if pads == "NOTSET":
-            pads = node.attrs.get("pads", [0] * spatial_size * 2)
-            pads = np.reshape(pads, [2, spatial_size]).T.tolist()
-            pads = [[0, 0], [0, 0]] + pads
-
-        # oneflow now not support ceil_mode pool, so this is a temporary solution
-        if ceil_mode == 1:
-
-            if (x.shape[2] + pads[2][0] + pads[2][1] - 1) % strides[0] != 0:
-                pads[2][1] = pads[2][1] + (strides[0] - 1)
-
-            if (x.shape[3] + pads[3][0] + pads[3][1] - 1) % strides[1] != 0:
-                pads[3][1] = pads[3][1] + (strides[1] - 1)
-        count_include_pad = bool(node.attrs.get("count_include_pad", 0))
-        if count_include_pad != 0:
-            x = flow.pad(
-                x,
-                paddings=(
-                    (pads[0][0], pads[0][1]),
-                    (pads[1][0], pads[1][1]),
-                    (pads[2][0], pads[2][1]),
-                    (pads[3][0], pads[3][1]),
-                ),
-            )
-            func = '{} = flow.pad({}, paddings=(({}, {}), ({}, {}), ({}, {}), ({}, {})))\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], pads[0][0],
-                    pads[0][1], pads[1][0], pads[1][1], pads[2][0], pads[2][1], pads[3][0], pads[3][1])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            pads = [[0, 0], [0, 0], [0, 0], [0, 0]]
-            # raise ValueError("count_include_pad != 0 is not supported")
-        
-        pool_type = ''
-
-        if pooling_type == "AVG":
-            op = flow.nn.avg_pool2d
-            pool_type = 'flow.nn.avg_pool2d('
-        elif pooling_type == "MAX":
-            op = flow.nn.max_pool2d
-            pool_type = 'flow.nn.max_pool2d('
-        elif pooling_type == "MAX_WITH_ARGMAX":
-            raise ValueError("maxpooling with argmax is not supported")
-
-        if spatial_size != 2:
-            raise ValueError("non-2d pooling is not supported")
-        if node.attrs.get("storage_order", 0) != 0:
-            raise ValueError("storage_order != 0 is not supported")
-
-        # code gen for avgpool2d and maxpool2d pool
-        oneflow_blobname_map[x] = node.input_tensor_names[0]
-        
-        func = '{} = '.format(node.output_tensor_names[0])
-        func = func + pool_type
-        func = func + node.input_tensor_names[0] + ', '
-        func = func + 'ksize={}, '.format(kernel_shape)
-        func = func + 'strides={}, '.format(strides)
-        func = func + 'padding=(({}, {}), ({}, {}), ({}, {}), ({}, {})), '.format(pads[0][0],
-                    pads[0][1], pads[1][0], pads[1][1], pads[2][0], pads[2][1], pads[3][0], pads[3][1])
-        func = func + 'data_format={})\n'.format("'NCHW'")
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        y =  op(
-            x, ksize=kernel_shape, strides=strides, padding=pads, data_format="NCHW"
-        )
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-        return y
-
-
-@onnx_op("AveragePool")
-class AveragePool(PoolMixin, BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.pool(node, tensor_dict, "AVG", kwargs.get("strict", True))
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_10(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("MaxPool")
-class MaxPool(PoolMixin, BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        pool_type = "MAX" if len(node.output_tensor_names) == 1 else "MAX_WITH_ARGMAX"
-        return cls.pool(node, tensor_dict, pool_type, kwargs.get("strict", True))
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_8(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_10(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Relu")
-@flow_func(flow.math.relu)
-class Relu(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_14(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-
-
-@onnx_op("Pad")
-@flow_func(flow.pad)
-class Pad(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        mode = node.attrs.pop("mode", "constant")
-        if mode != "constant":
-            raise NotImplementedError('Padding mode "{}" is not supported'.format(mode))
-
-        if cls.SINCE_VERSION < 11:  # for opset 1 and opset 2
-            node.attrs["paddings"] = node.attrs.pop("pads", None)
-            node.attrs["constant_value"] = node.attrs.pop("value", 0.0)
-
-        else:  # for opset 11
-            init_dict = kwargs["init_dict"]
-            paddings = (
-                init_dict[node.input_tensor_names[1]]
-                .reshape(2, -1)
-                .transpose((1, 0))
-                .tolist()
-            )
-            constant_values = (
-                init_dict[node.input_tensor_names[2]].item()
-                if len(node.input_tensor_names) == 3
-                else 0
-            )
-
-        return [
-            cls.run_onnx_node(
-                node, tensor_dict, inputs=[x, paddings, constant_values], **kwargs
-            )
-        ]
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_2(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("GlobalMaxPool")
-class GlobalMaxPool(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        spatial_dims = list(range(2, len(x.shape)))
-        return flow.math.reduce_max(x, spatial_dims, keepdims=True)
-
-
-@onnx_op("GlobalAveragePool")
-class GlobalAverageMaxPool(BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        spatial_dims = list(range(2, len(x.shape)))
-        func = '{} = flow.math.reduce_mean({}, axis={}, keepdims=True)\n'.format(node.output_tensor_names[0], node.input_tensor_names[0], spatial_dims)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        y = flow.math.reduce_mean(x, spatial_dims, keepdims=True)
-        if y not in oneflow_blobname_map:
-            oneflow_blobname_map[y] = node.output_tensor_names[0]
-        return y
-
-
-@onnx_op("Softmax")
-class Softmax(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axis = node.attrs.get("axis", 1)
-        axis = axis if axis >= 0 else len(np.shape(x)) + axis
-
-        if x not in oneflow_blobname_map:
-            oneflow_blobname_map[x] = node.input_tensor_names[0]
-
-        if axis == len(np.shape(x)) - 1:
-            func = '{} = flow.nn.softmax({})\n'.format(node.output_tensor_names[0], node.input_tensor_names[0])
-            if func not in oneflow_code_gen:
-                oneflow_code_gen.append(func)
-            return flow.nn.softmax(x)
-
-        shape = x.shape
-        cal_shape = (
-            reduce(operator.mul, shape[0:axis], 1),
-            reduce(operator.mul, shape[axis : len(shape)], 1),
-        )
-        func = '{} = flow.reshape({}, {})\n'.format(node.input_tensor_names[0], node.input_tensor_names[0], cal_shape)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-        func = '{} = flow.reshape(flow.nn.softmax({}), {})'.format(node.output_tensor_names[0], node.input_tensor_names[0], shape)
-        if func not in oneflow_code_gen:
-            oneflow_code_gen.append(func)
-
-        x = flow.reshape(x, cal_shape)
-
-        return flow.reshape(flow.nn.softmax(x), shape)
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("LeakyRelu")
-@flow_func(flow.nn.leaky_relu)
-class LeakyRelu(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        return cls.run_onnx_node(node, tensor_dict, **kwargs)
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("PRelu")
-@flow_func(flow.layers.prelu)
-class PRelu(BackendHandler):
-
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        name = node.input_tensor_names[0]
-
-        cls.copy_variable_file(node.input_tensor_names[1], name + "-alpha")
-        node.input_tensor_names = node.input_tensor_names[:1]
-
-        return [
-            cls.run_onnx_node(node, tensor_dict, name=name, **kwargs, attrs={"shared_axes": [2, 3]})
-        ]
-
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_6(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_7(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_9(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
\ No newline at end of file
diff --git a/oneflow_onnx/x2oneflow/handlers/reduce.py b/oneflow_onnx/x2oneflow/handlers/reduce.py
deleted file mode 100644
index 483170f5c59a3398dc7219afd87db651dfa37c2b..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/handlers/reduce.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import operator
-from functools import reduce
-
-import numpy as np
-
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-from oneflow_onnx.x2oneflow.handler import onnx_op
-from oneflow_onnx.x2oneflow.handler import flow_func
-from oneflow_onnx.x2oneflow.handlers.common import ReductionMixin
-import oneflow as flow
-
-
-@onnx_op("ReduceMean")
-@flow_func(flow.math.reduce_mean)
-class ReduceMean(ReductionMixin, BackendHandler):
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-@onnx_op("ReduceMax")
-class ReduceMax(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = node.attrs.get("axes")
-        keepdims = node.attrs.get("keepdims")
-        return flow.math.reduce_max(x, axis=axes, keepdims=bool(keepdims))
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-@onnx_op("ReduceMin")
-class ReduceMin(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = node.attrs.get("axes")
-        keepdims = node.attrs.get("keepdims")
-        return flow.math.reduce_min(x, axis=axes, keepdims=bool(keepdims))
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_12(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-
-
-@onnx_op("ReduceProd")
-@flow_func(flow.math.reduce_prod)
-class ReduceProd(BackendHandler):
-    @classmethod
-    def _common(cls, node, tensor_dict, **kwargs):
-        x = tensor_dict[node.input_tensor_names[0]]
-        axes = node.attrs.get("axes")
-        keepdims = node.attrs.get("keepdims")
-        return flow.math.reduce_prod(x, axis=axes, keepdims=bool(keepdims))
-    
-    @classmethod
-    def version_1(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-
-    @classmethod
-    def version_11(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
-    
-    @classmethod
-    def version_13(cls, node, tensor_dict, **kwargs):
-        return cls._common(node, tensor_dict, **kwargs)
diff --git a/oneflow_onnx/x2oneflow/onnx2flow.py b/oneflow_onnx/x2oneflow/onnx2flow.py
deleted file mode 100644
index 4f5262c0858a120263dff8859a060e90d6c14739..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/onnx2flow.py
+++ /dev/null
@@ -1,600 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-try:
-    from itertools import izip as zip
-except ImportError:  # will be 3.x series
-    pass
-
-import copy
-from onnx import defs
-from onnx import numpy_helper
-from onnx.backend.base import Backend
-from onnx.backend.base import Device
-from onnx.backend.base import namedtupledict
-from onnx.helper import make_opsetid
-import oneflow as flow
-
-from oneflow_onnx import util
-from oneflow_onnx.x2oneflow.handler import BackendHandler
-
-from oneflow_onnx.x2oneflow.handlers import *
-from oneflow_onnx.onnx_wrapper import Node as OnnxNode
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-import io
-import tempfile
-import os
-import shutil
-import numpy as np
-import onnx
-import torch
-import paddle
-import tensorflow as tf
-import tf2onnx
-import logging
-import onnxoptimizer
-
-try:
-    import onnxsim
-
-    has_onnxsim = True
-except ImportError:
-    has_onnxsim = False
-
-logger = logging.getLogger(__name__)
-
-
-def from_onnx(
-    onnx_model: onnx.ModelProto, inputs, model_weight_dir="/tmp/tmp", do_onnxsim=True, from_tf2=False, from_paddle=False, from_pytorch=False, 
-):
-    oneflow_code_gen = []
-    oneflow_blobname_map = dict()
-    input_names = [x.name for x in onnx_model.graph.input]
-    if type(inputs) is not dict:
-        assert (
-            len(input_names) == 1
-        ), "Please use input dict if the model has multiple inputs"
-        inputs = {input_names[0]: inputs}
-    if do_onnxsim and has_onnxsim:
-        dict(zip(input_names, [x.shape for x in inputs.values()]))
-        onnx_model, _ = onnxsim.simplify(
-            onnx_model,
-            skip_fuse_bn=False,
-            skip_shape_inference=False,
-            input_shapes=dict(zip(input_names, [x.shape for x in inputs.values()])),
-        )
-    elif do_onnxsim:
-        logger.info(
-            "We recommend installing onnx-simplifier so that OneFlow can remove the redundant ONNX nodes"
-        )
-    
-    initializer_name = []
-    if from_tf2:
-        for x in onnx_model.graph.input:
-            x.name = x.name.replace('/', '_')
-            x.name = x.name.replace(':', '_')
-        for i, node in enumerate(onnx_model.graph.node):
-            node.name = node.name.replace('/', '_')
-            node.name = node.name.replace(':', '_')
-            for j in range(len(node.input)):
-                node.input[j] = node.input[j].replace('/', '_')
-                node.input[j] = node.input[j].replace(':', '_')
-            for j in range(len(node.output)):
-                node.output[j] = node.output[j].replace('/', '_')
-                node.output[j] = node.output[j].replace(':', '_')
-        for x in onnx_model.graph.initializer:
-            x.name = x.name.replace('/', '_')
-            x.name = x.name.replace(':', '_')
-            initializer_name.append(x.name)
-        # to solve tf batchnorm without scale params
-        delete_node_name = []
-        for i, node in enumerate(onnx_model.graph.node):
-            if node.op_type == "BatchNormalization":
-                if node.input[1] in initializer_name:
-                    pass
-                else:
-                    delete_node_name.append(node.input[1])
-        
-        for i, x in enumerate(onnx_model.graph.input):
-            if x.name in delete_node_name:
-                tensor_dim = onnx_model.graph.input[i].type.tensor_type.shape.dim
-                new_bn_value = []
-                for j in range(int(tensor_dim[0].dim_value)):
-                    new_bn_value.append(1.0)
-                new_bn_scale_node = onnx.helper.make_tensor(name=x.name, data_type=onnx.TensorProto.FLOAT, dims=(int(tensor_dim[0].dim_value),), vals=new_bn_value)
-                onnx_model.graph.initializer.extend([new_bn_scale_node])
-        
-        for x in onnx_model.graph.input:
-            if x.name in delete_node_name:
-                onnx_model.graph.input.remove(x)
-
-    # to solve paddlepaddle2oneflow initializer rename bug
-    if from_paddle == True:
-        
-        graph_input_name = {}
-        graph_initializer_name = []
-        for x in onnx_model.graph.initializer:
-            graph_initializer_name.append(x.name)
-
-        for i, node in enumerate(onnx_model.graph.node):
-            # node_cp = node
-            node_cp = copy.deepcopy(node)
-            for j in range(len(node.input)):
-                if node.input[j] in graph_initializer_name:
-                    node_cp.input[j] = node.name + "_" + node.input[j]
-                    graph_input_name[node_cp.input[j]] = node.input[j]
-            onnx_model.graph.node.remove(node)
-            onnx_model.graph.node.insert(i, node_cp)
-        
-        extend_op = []
-        for k, v in graph_input_name.items():
-            for x in onnx_model.graph.initializer:
-                base_name = x.name
-                if x.name == v:
-                    x.name = k
-                    for k2, v2 in graph_input_name.items():
-                        if v2 == base_name and k2 != k:
-                            x_cp = copy.deepcopy(x)
-                            x_cp.name = k2
-                            extend_op.append(x_cp)
-            for x in onnx_model.graph.input:
-                if x.name == v:
-                    onnx_model.graph.input.remove(x)
-        for x in extend_op:
-            onnx_model.graph.initializer.extend([x])
-    
-    # for code gen
-    for x in onnx_model.graph.input:
-            x.name = x.name.replace('.', '_')
-            x.name = x.name.replace('/', '_')
-            x.name = x.name.replace(':', '_')
-    for i, node in enumerate(onnx_model.graph.node):
-        node.name = node.name.replace('.', '_')
-        node.name = node.name.replace('/', '_')
-        node.name = node.name.replace(':', '_')
-        for j in range(len(node.input)):
-            node.input[j] = node.input[j].replace('.', '_')
-            node.input[j] = node.input[j].replace('/', '_')
-            node.input[j] = node.input[j].replace(':', '_')
-        for j in range(len(node.output)):
-            node.output[j] = node.output[j].replace('.', '_')
-            node.output[j] = node.output[j].replace('/', '_')
-            node.output[j] = node.output[j].replace(':', '_')
-    for x in onnx_model.graph.initializer:
-        x.name = x.name.replace('.', '_')
-        x.name = x.name.replace('/', '_')
-        x.name = x.name.replace(':', '_')
-    for x in onnx_model.graph.output:
-        x.name = x.name.replace('.', '_')
-        x.name = x.name.replace('/', '_')
-        x.name = x.name.replace(':', '_')
-    
-    graph_initializer_name = []
-    for x in onnx_model.graph.initializer:
-        graph_initializer_name.append(x.name)
-    graph_name_dict = {}
-    rename_set = []
-    for i, node in enumerate(onnx_model.graph.node):
-        # node_cp = node
-        node_cp = copy.deepcopy(node)
-        if node.name == '':
-            cnt = 0
-            while True:
-                node.name = node.op_type + '_{}'.format(cnt)
-                if node.name in rename_set:
-                    pass
-                else:
-                    rename_set.append(node.name)
-                    break
-                cnt = cnt + 1
-        for j in range(len(node.input)):
-            if node.input[j] == 'x_0':
-                node_cp.input[j] = node.input[j]
-            elif node.input[j] in graph_name_dict:
-                node_cp.input[j] = graph_name_dict[node.input[j]]
-            else:
-                if node.op_type == "Clip" and (node.input[j] not in graph_initializer_name):
-                    pass
-                else:
-                    node_cp.input[j] = node.name.lower() + '_input_{}'.format(j)
-                    graph_name_dict[node.input[j]] = node_cp.input[j]
-        for j in range(len(node.output)):
-            if node.output[j] in graph_name_dict:
-                node_cp.output[j] = graph_name_dict[node.output[j]]
-            else:
-                node_cp.output[j] = node.name.lower() + '_output_{}'.format(j)
-                graph_name_dict[node.output[j]] = node_cp.output[j]
-        
-        onnx_model.graph.node.remove(node)
-        onnx_model.graph.node.insert(i, node_cp)
-
-    for x in onnx_model.graph.input:
-        if x.name in graph_name_dict:
-            x.name = graph_name_dict[x.name]
-    for x in onnx_model.graph.output:
-        if x.name in graph_name_dict:
-            x.name = graph_name_dict[x.name]
-    for x in onnx_model.graph.initializer:
-        if x.name in graph_name_dict:
-            x.name = graph_name_dict[x.name]
-    
-    onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
-    
-    # to save onnx model after onnx_simplifier
-    if not os.path.exists("/tmp"):
-        os.makedirs("/tmp")
-    onnx.save(onnx_model, "/tmp/simp.onnx")
-
-    if os.path.exists(model_weight_dir):
-        shutil.rmtree(model_weight_dir)
-    BackendHandler.WEIGHT_SAVE_DIR = model_weight_dir
-
-    for x in onnx_model.graph.initializer:
-        dir_name = os.path.join(model_weight_dir, x.name)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name)
-        with open(os.path.join(dir_name, "out"), "wb") as f:
-            value = numpy_helper.to_array(x)
-            f.write(value.tobytes())
-    for node in onnx_model.graph.node:
-        node = OnnxNode(node)
-        if node.op_type == "Constant":
-            attr_value = node.attrs["value"]
-            value = numpy_helper.to_array(attr_value)
-            # we do not support 0d tensor
-            if len(value.shape) == 0:
-                value = np.reshape(value, (1,))
-            dir_name = os.path.join(model_weight_dir, node.output_tensor_names[0])
-            if not os.path.exists(dir_name):
-                os.makedirs(dir_name)
-            with open(os.path.join(dir_name, "out"), "wb") as f:
-                f.write(value.tobytes())
-
-    def write_fake_data(var_name, value):
-        dir_name = os.path.join(model_weight_dir, var_name)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name)
-        with open(os.path.join(dir_name, "out"), "wb") as f:
-            f.write(value.tobytes())
-
-    train_step_name = "System-Train-TrainStep-temp_job"
-    write_fake_data(train_step_name, np.array([0]))
-    write_fake_data("v1", np.array([0], dtype=np.float32))
-
-    d = prepare(onnx_model, blob_dict=inputs)
-    output_names = [x.name for x in onnx_model.graph.output]
-    if len(output_names) == 1:
-        return d[output_names[0]]
-    return {output_name: d[output_name] for output_name in output_names}
-
-
-def from_pytorch(
-    torch_model, inputs, model_weight_dir="/tmp", do_onnxsim=True, train_flag=True
-):
-    if type(inputs) is not list:
-        inputs = [inputs]
-    input_names = ["x_{}".format(i) for i in range(len(inputs))]
-
-    torch_model = torch_model.to("cpu")
-
-    f = io.BytesIO()
-    torch.onnx.export(
-        torch_model,
-        tuple([torch.zeros(ipt.shape) for ipt in inputs]),
-        f,
-        input_names=input_names,
-        opset_version=12,
-        training=train_flag,
-    )
-    model_str = f.getvalue()
-    onnx_model = onnx.load_model_from_string(model_str)
-    return from_onnx(
-        onnx_model,
-        dict(zip(input_names, inputs)),
-        model_weight_dir=model_weight_dir,
-        do_onnxsim=do_onnxsim,
-        from_pytorch=True,
-    )
-
-
-def from_paddle(
-    paddle_model, inputs, model_weight_dir="/tmp", do_onnxsim=True, train_flag=True
-):
-    input_names = "x_0"
-    paddle_model.eval()
-    input_spec = paddle.static.InputSpec(
-        shape=inputs.shape, dtype="float32", name=input_names
-    )
-
-    mode_str = "/tmp/tmp"
-
-    paddle.onnx.export(
-        paddle_model,
-        mode_str,
-        input_spec=[input_spec],
-        opset_version=12,
-        enable_onnx_checker=True,
-    )
-
-    onnx_model = onnx.load(str(mode_str + ".onnx"))
-
-    return from_onnx(
-        onnx_model,
-        dict(zip([input_names], [inputs])),
-        model_weight_dir=model_weight_dir,
-        do_onnxsim=do_onnxsim,
-        from_paddle=True,
-    )
-
-
-def from_tensorflow2(
-    tf_model, inputs, model_weight_dir="/tmp", do_onnxsim=True, train_flag=True
-):
-    input_names = "x_0"
-    # input_spec = paddle.static.InputSpec(
-    #     shape=inputs.shape, dtype="float32", name=input_names
-    # )
-    spec = (tf.TensorSpec(inputs.shape, tf.float32, name=input_names),)
-
-    mode_str = "/tmp/tmp.onnx"
-
-    model_proto, _ = tf2onnx.convert.from_keras(
-        tf_model, input_signature=spec, opset=11, output_path=mode_str
-    )
-
-    return from_onnx(
-        model_proto,
-        dict(zip([input_names], [inputs])),
-        model_weight_dir=model_weight_dir,
-        do_onnxsim=do_onnxsim,
-        from_tf2=True,
-    )
-
-
-def get_all_backend_handlers(opset_dict):
-    """ Get a dict of all backend handler classes.
-  e.g. {'domain': {'Abs': Abs handler class}, ...}, }.
-  :param opset_dict: A dict of opset. e.g. {'domain': version, ...}
-  :return: Dict.
-  """
-    handlers = {}
-    for handler in BackendHandler.__subclasses__():
-        handler.check_cls()
-
-        domain = handler.DOMAIN
-        version = opset_dict[domain]
-        handler.VERSION = version
-
-        since_version = 1
-        if defs.has(handler.ONNX_OP, domain=handler.DOMAIN):
-            try:
-                since_version = defs.get_schema(
-                    handler.ONNX_OP,
-                    domain=handler.DOMAIN,
-                    max_inclusive_version=version,
-                ).since_version
-            except RuntimeError:
-                logger.info(
-                    "Fail to get since_version of {} in domain `{}` "
-                    "with max_inclusive_version={}. Set to 1.".format(
-                        handler.ONNX_OP, handler.DOMAIN, version
-                    )
-                )
-        else:
-            logger.info(
-                "Unknown op {} in domain `{}`.".format(
-                    handler.ONNX_OP, handler.DOMAIN or "ai.onnx"
-                )
-            )
-        handler.SINCE_VERSION = since_version
-        handlers.setdefault(domain, {})[handler.ONNX_OP] = handler
-    return handlers
-
-
-class OneflowBackend(Backend):
-    """ Oneflow Backend for ONNX
-    """
-
-    @classmethod
-    def prepare(
-        cls,
-        model,
-        device="CPU",
-        strict=True,
-        logging_level="INFO",
-        blob_dict=None,
-        **kwargs
-    ):
-        """Prepare an ONNX model for Oneflow Backend.
-    :param model: The ONNX model to be converted.
-    :param device: The device to execute this model on.
-    :param strict: Whether to enforce semantic equivalence between the original model
-      and the converted oneflow model, defaults to True (yes, enforce semantic equivalence).
-      Changing to False is strongly discouraged.
-      Currently, the strict flag only affects the behavior of MaxPool and AveragePool ops.
-    :param logging_level: The logging level, default is INFO. Change it to DEBUG
-      to see more conversion details or to WARNING to see less
-    :returns: The variable dict of the converted oneflow model
-    """
-        super(OneflowBackend, cls).prepare(model, device, **kwargs)
-        logger.setLevel(logging_level)
-
-        return cls.onnx_model_to_oneflow(model, strict, blob_dict=blob_dict)
-
-    @classmethod
-    def onnx_model_to_oneflow(cls, model, strict, blob_dict=None):
-        """ Convert ONNX model to oneflow.
-    :param model: ONNX ModelProto object.
-    :param strict: whether to enforce semantic equivalence between the original model
-      and the converted oneflow model.
-    :return: The variable dict of the converted oneflow model
-    """
-
-        # Models with IR_VERSION less than 3 does not have opset_import set.
-        # We default to minimum opset, this behavior is consistent with
-        # onnx checker.
-        # c.f. https://github.com/onnx/onnx/blob/427ac0c1b792363d373e3d7e4eef97fa46458420/onnx/checker.cc#L478
-        if model.ir_version < 3:
-            opset_import = [make_opsetid(defs.ONNX_DOMAIN, 1)]
-        else:
-            opset_import = model.opset_import
-        return cls._onnx_graph_to_oneflow(
-            model.graph, opset_import, strict, blob_dict=blob_dict
-        )
-
-    @classmethod
-    def _onnx_graph_to_oneflow(cls, graph_def, opset, strict, blob_dict=None):
-        """ Convert ONNX graph to oneflow.
-        :param graph_def: ONNX GraphProto object.
-        :param opset: ONNX OperatorSetIdProto list.
-        :param strict: whether to enforce semantic equivalence between the original model
-          and the converted oneflow.
-        :param blob_dict: {name: oneflow_blob}, the inputs of onnx graph will be populated with oneflow_blob with the same name
-        :return: The variable dict of the converted oneflow model
-        """
-        if blob_dict is None:
-            blob_dict = {}
-        handlers = cls._get_handlers(opset)
-
-        # initializer: TensorProtos representing the values to initialize
-        # a given tensor.
-        # initialized: A list of names of the initialized tensors.
-        if graph_def.initializer:
-            input_dict_items = cls._onnx_initializer_to_input_dict_items(
-                graph_def.initializer
-            )
-            initialized = {
-                init.name: onnx.numpy_helper.to_array(init)
-                for init in graph_def.initializer
-            }
-        else:
-            input_dict_items = []
-            initialized = {}
-
-        for node in graph_def.node:
-            node = OnnxNode(node)
-            if node.op_type == "Constant":
-                initialized[node.output_tensor_names[0]] = numpy_helper.to_array(
-                    node.attrs["value"]
-                )
-
-        # creating placeholders for currently unknown inputs
-        for value_info in graph_def.input:
-            if value_info.name in initialized:
-                continue
-            shape = list(
-                d.dim_value if (d.dim_value > 0 and d.dim_param == "") else None
-                for d in value_info.type.tensor_type.shape.dim
-            )
-            if value_info.name not in blob_dict:
-                raise NotImplementedError("no blob named {}".format(value_info.name))
-            input_dict_items.append((value_info.name, blob_dict[value_info.name]))
-
-        # tensor dict: this dictionary is a map from variable names
-        # to the latest produced oneflow variables of the given name.
-        # This dictionary will get updated as we build the graph to
-        # record the names of newly produced tensors.
-        tensor_dict = dict(input_dict_items)
-        # Since tensor dict may be updated, we need to keep a copy
-        # of the original input dict where we track the earliest
-        # defined tensors so we can have access to the placeholders
-        # to feed in input tensors when we run the graph.
-        input_dict = dict(input_dict_items)
-
-        for node in graph_def.node:
-            onnx_node = OnnxNode(node)
-            output_ops = cls._onnx_node_to_oneflow_op(
-                onnx_node,
-                tensor_dict,
-                initialized,
-                handlers,
-                opset=opset,
-                strict=strict,
-            )
-            curr_node_output_map = dict(zip(onnx_node.output_tensor_names, output_ops))
-            tensor_dict.update(curr_node_output_map)
-        return tensor_dict
-
-    @classmethod
-    def _onnx_initializer_to_input_dict_items(cls, initializer):
-        """ Convert ONNX graph initializer to input dict items.
-    :param initializer: ONNX graph initializer, list of TensorProto.
-    :return: List of input dict items.
-    """
-
-        def get_flow_shape(shape):
-            if len(shape) == 0:
-                return (1,)
-            return shape
-
-        return [
-            (
-                init.name,
-                flow.get_variable(
-                    name=init.name,
-                    shape=get_flow_shape(list(init.dims)),
-                    initializer=flow.zeros_initializer(),
-                    trainable=True,
-                    dtype=util.Onnx2FlowDtype(init.data_type),
-                ),
-            )
-            for init in initializer
-        ]
-
-    @classmethod
-    def _onnx_node_to_oneflow_op(
-        cls, node, tensor_dict, init_dict, handlers=None, opset=None, strict=True
-    ):
-        """
-    Convert onnx node to oneflow op.
-    Args:
-      node: Onnx node object.
-      tensor_dict: Tensor dict of graph.
-      opset: Opset version of the operator set. Default 0 means using latest version.
-      strict: whether to enforce semantic equivalence between the original model
-        and the converted oneflow model, defaults to True (yes, enforce semantic equivalence).
-        Changing to False is strongly discouraged.
-    Returns:
-      oneflow op
-    """
-        handlers = handlers or cls._get_handlers(opset)
-        handler = handlers[node.domain].get(node.op_type, None)
-        if handler:
-            output = handler.handle(
-                node, tensor_dict, init_dict=init_dict, strict=strict
-            )
-            if not isinstance(output, (list, tuple)):
-                output = [output]
-            return output
-        else:
-            raise ValueError("{} is not supported".format(node.op_type))
-
-    @classmethod
-    def _get_handlers(cls, opset):
-        """ Get all backend handlers with opset.
-    :param opset: ONNX OperatorSetIdProto list.
-    :return: All backend handlers.
-    """
-        opset = opset or [make_opsetid(defs.ONNX_DOMAIN, defs.onnx_opset_version())]
-        opset_dict = dict([(o.domain, o.version) for o in opset])
-        return get_all_backend_handlers(opset_dict)
-
-
-prepare = OneflowBackend.prepare
diff --git a/oneflow_onnx/x2oneflow/util.py b/oneflow_onnx/x2oneflow/util.py
deleted file mode 100644
index a0bf98fdf0d9c95c6fcbbff2a8759c94d5dbf774..0000000000000000000000000000000000000000
--- a/oneflow_onnx/x2oneflow/util.py
+++ /dev/null
@@ -1,365 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from collections import OrderedDict
-import tempfile
-import os
-import shutil
-
-import numpy as np
-import onnxruntime as ort
-import onnx
-import torch
-import paddle
-import tensorflow as tf
-
-import oneflow as flow
-import oneflow.typing as tp
-from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
-from oneflow_onnx.x2oneflow.onnx2flow import from_onnx, from_pytorch, from_paddle, from_tensorflow2
-
-def oneflow_code_gen_func(input_size, model_weight_save_dir):
-    oneflow_python_file = "/tmp/oneflow_code.py"
-    onnx_file = '/tmp/simp.onnx'
-    f = open(oneflow_python_file, 'w')
-
-    f.write('import oneflow as flow\n')
-    f.write('import oneflow.typing as tp\n')
-    f.write('import numpy as np\n')
-    f.write('import onnxruntime as ort\n')
-    f.write('from collections import OrderedDict\n\n')
-
-    f.write('@flow.global_function(type="predict")\n')
-    f.write('def eval_job(\n')
-    f.write('   x_0: tp.Numpy.Placeholder(({}, {}, {}, {}), dtype=flow.float)\n'.format(input_size[0], input_size[1], input_size[2], input_size[3]))
-    f.write(') -> tp.Numpy:\n')
-    f.write('   with flow.scope.placement("gpu", "0:0"):\n')
-
-
-
-    for x in oneflow_code_gen:
-        f.write('     {}'.format(x))
-    
-    res = oneflow_code_gen[len(oneflow_code_gen)-1].split()[0]
-    f.write('     return {}'.format(res))
-
-    f.write('\n\n')
-    
-    f.write('def main():\n')
-    f.write('   x = np.random.uniform(low=0.0, high=1.0, size=({}, {}, {}, {})).astype(np.float32)\n'.format(input_size[0], input_size[1], input_size[2], input_size[3]))
-    f.write('   flow.train.CheckPoint().load({})\n'.format("'"+model_weight_save_dir+"'"))
-    f.write('   oneflow_res = eval_job(x)\n\n')
-    f.write('   ort_sess_opt = ort.SessionOptions()\n')
-    f.write('   ort_sess_opt.graph_optimization_level = (\n')
-    f.write('     ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED\n')
-    f.write('   )\n\n')
-    f.write('   sess = ort.InferenceSession({}, sess_options=ort_sess_opt)\n'.format("'"+onnx_file+"'"))
-    f.write('   assert len(sess.get_outputs()) == 1\n')
-    f.write('   assert len(sess.get_inputs()) <= 1\n')
-    f.write('   ipt_dict = OrderedDict()\n')
-    f.write('   for ipt in sess.get_inputs():\n')
-    f.write('     ipt_dict[ipt.name] = x\n\n')
-    f.write('   onnx_res = sess.run([], ipt_dict)[0]\n')
-    f.write('   rtol, atol = 1e-2, 1e-5\n')
-    f.write('   a = onnx_res.flatten()\n')
-    f.write('   b = oneflow_res.flatten()\n')
-    f.write('   for i in range(len(a)):\n')
-    f.write('     if np.abs(a[i] - b[i]) > atol + rtol * np.abs(b[i]):\n')
-    f.write('        print("a[{}]={}, b[{}]={}".format(i, a[i], i, b[i]))\n\n')
-    f.write('   assert np.allclose(onnx_res, oneflow_res, rtol=rtol, atol=atol)\n\n')
-    
-    f.write('if __name__ == "__main__":\n')
-    f.write('   main()\n')
-    f.close()
-
-def load_pytorch_module_and_check(
-    pt_module_class,
-    input_size=None,
-    input_min_val=0.0,
-    input_max_val=1.0,
-    train_flag=False,
-    flow_weight_dir="/tmp/oneflow",
-    oneflow_code_gen_flag=False,
-):
-    if input_size is None:
-        input_size = (2, 4, 3, 5)
-    pt_module = pt_module_class()
-
-    model_weight_save_dir = flow_weight_dir
-
-    if train_flag == True:
-
-        @flow.global_function(type="train")
-        def job_train(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_pytorch(
-                pt_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0])
-            flow.optimizer.SGD(lr_scheduler).minimize(y)
-            return y
-
-    else:
-
-        @flow.global_function(type="predict")
-        def job_eval(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_pytorch(
-                pt_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            return y
-
-    flow.train.CheckPoint().load(model_weight_save_dir)
-    # flow.load_variables(flow.checkpoint.get(model _weight_save_dir))
-
-    if oneflow_code_gen_flag == True and len(input_size) == 4:
-        oneflow_code_gen_func(input_size, model_weight_save_dir)
-        flow.clear_default_session()
-        return
-        
-    if train_flag == False:
-        pt_module.eval()
-    
-    ipt1 = np.random.uniform(
-        low=input_min_val, high=input_max_val, size=input_size
-    ).astype(np.float32)
-
-    if train_flag == True:
-        flow_res = job_train(ipt1)
-    else:
-        flow_res = job_eval(ipt1)
-    pytorch_res = pt_module(torch.tensor(ipt1).to("cpu")).detach().numpy()
-    print(flow_res)
-    print("-------------")
-    print(pytorch_res)
-
-    a, b = flow_res.flatten(), pytorch_res.flatten()
-
-    max_idx = np.argmax(np.abs(a - b) / (a + 1e-7))
-    print(
-        "max rel diff is {} at index {}".format(
-            np.max(np.abs(a - b) / (a + 1e-7)), max_idx
-        )
-    )
-    print("a[{}]={}, b[{}]={}".format(max_idx, a[max_idx], max_idx, b[max_idx]))
-    flow.clear_default_session()
-
-
-def load_paddle_module_and_check(
-    pd_module_class,
-    input_size=None,
-    input_min_val=0.0,
-    input_max_val=1.0,
-    train_flag=False,
-    flow_weight_dir="/tmp/oneflow",
-    oneflow_code_gen_flag = False, 
-):
-    if input_size is None:
-        input_size = (2, 4, 3, 5)
-    pd_module = pd_module_class()
-
-    model_weight_save_dir = flow_weight_dir
-
-    if train_flag == True:
-
-        @flow.global_function(type="train")
-        def job_train(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_paddle(
-                pd_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0])
-            flow.optimizer.SGD(lr_scheduler).minimize(y)
-            return y
-
-    else:
-
-        @flow.global_function(type="predict")
-        def job_eval(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_paddle(
-                pd_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            return y
-
-    flow.train.CheckPoint().load(model_weight_save_dir)
-
-    if oneflow_code_gen_flag == True and len(input_size) == 4:
-        oneflow_code_gen_func(input_size, model_weight_save_dir)
-        flow.clear_default_session()
-        return
-    
-    if train_flag == False:
-        pd_module.eval()
-
-    ipt1 = np.random.uniform(
-        low=input_min_val, high=input_max_val, size=input_size
-    ).astype(np.float32)
-    if train_flag == True:
-        flow_res = job_train(ipt1)
-    else:
-        flow_res = job_eval(ipt1)
-    paddle_res = pd_module(paddle.to_tensor(ipt1)).numpy()
-    print(flow_res)
-    print("-------------")
-    print(paddle_res)
-
-    a, b = flow_res.flatten(), paddle_res.flatten()
-
-    max_idx = np.argmax(np.abs(a - b) / (a + 1e-7))
-    print(
-        "max rel diff is {} at index {}".format(
-            np.max(np.abs(a - b) / (a + 1e-7)), max_idx
-        )
-    )
-    print("a[{}]={}, b[{}]={}".format(max_idx, a[max_idx], max_idx, b[max_idx]))
-    flow.clear_default_session()
-
-
-def load_tensorflow2_module_and_check(
-    tf_module_class,
-    input_size=None,
-    input_min_val=0.0,
-    input_max_val=1.0,
-    train_flag=False,
-    flow_weight_dir="/tmp/oneflow",
-    oneflow_code_gen_flag = False, 
-):
-    if input_size is None:
-        input_size = (2, 4, 3, 5)
-    tf_module = tf_module_class()
-    
-    # flow.config.enable_debug_mode(True)
-
-    model_weight_save_dir = flow_weight_dir
-
-    if train_flag == True:
-
-        @flow.global_function(type="train")
-        def job_train(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_tensorflow2(
-                tf_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0])
-            flow.optimizer.SGD(lr_scheduler).minimize(y)
-            return y
-
-    else:
-
-        @flow.global_function(type="predict")
-        def job_eval(x: tp.Numpy.Placeholder(input_size)) -> tp.Numpy:
-            x += flow.get_variable(
-                name="trick",
-                shape=(1,),
-                dtype=flow.float,
-                initializer=flow.zeros_initializer(),
-            )
-
-            y = from_tensorflow2(
-                tf_module,
-                x,
-                model_weight_dir=model_weight_save_dir,
-                do_onnxsim=True,
-                train_flag=train_flag,
-            )
-            return y
-
-    flow.train.CheckPoint().load(model_weight_save_dir)
-
-    ipt1 = np.random.uniform(
-        low=input_min_val, high=input_max_val, size=input_size
-    ).astype(np.float32)
-
-    if oneflow_code_gen_flag == True and len(input_size) == 4:
-        oneflow_code_gen_func(input_size, model_weight_save_dir)
-        flow.clear_default_session()
-        return
-    
-    if train_flag == True:
-        flow_res = job_train(ipt1)
-    else:
-        flow_res = job_eval(ipt1)
-
-    tf_input = tf.constant(ipt1, dtype=tf.float32)
-    tensorflow_res = tf_module.predict(tf_input)
-    if type(tensorflow_res) is not list:
-        tensorflow_res = np.array(tensorflow_res)
-
-    print(flow_res)
-    print("-------------")
-    print(tensorflow_res)
-
-    a, b = flow_res.flatten(), tensorflow_res.flatten()
-
-    max_idx = np.argmax(np.abs(a - b) / (a + 1e-7))
-    print(
-        "max rel diff is {} at index {}".format(
-            np.max(np.abs(a - b) / (a + 1e-7)), max_idx
-        )
-    )
-    print("a[{}]={}, b[{}]={}".format(max_idx, a[max_idx], max_idx, b[max_idx]))
-    flow.clear_default_session()
-
diff --git a/save_serving_tool/README.md b/save_serving_tool/README.md
deleted file mode 100644
index 8828cabc6515857ba31ee19f2e740fe92776bfc8..0000000000000000000000000000000000000000
--- a/save_serving_tool/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# save_serving_tool
-
-这个工具以ResNet50为例子，介绍如何将OneFlow训练好的模型保存为OneFlow Serving端的模型。
-
-- resnet50.py 是ResNet50的OneFlow模型构建代码。
-- save_model.py 是加载ResNet50并转换为Serving端模型的具体代码，如果要转换其它模型，对应替换模型名字即可。
-- save_model.sh 将ResNet50模型转换的相关命令写入了这个Bash脚本，用户可对应修改相应参数即可快速完成Serving端的模型保存。
-
-如果用户要转换其它自定义模型，按照这个示例对应修改即可。
diff --git a/save_serving_tool/resnet50_model.py b/save_serving_tool/resnet50_model.py
deleted file mode 100644
index 3cba485e6641107d889241de4690f143f39b7413..0000000000000000000000000000000000000000
--- a/save_serving_tool/resnet50_model.py
+++ /dev/null
@@ -1,233 +0,0 @@
-import oneflow as flow
-
-BLOCK_COUNTS = [3, 4, 6, 3]
-BLOCK_FILTERS = [256, 512, 1024, 2048]
-BLOCK_FILTERS_INNER = [64, 128, 256, 512]
-
-
-class ResnetBuilder(object):
-    def __init__(
-        self,
-        weight_regularizer,
-        trainable=True,
-        training=True,
-        channel_last=False,
-        fuse_bn_relu=True,
-        fuse_bn_add_relu=True,
-    ):
-        self.data_format = "NHWC" if channel_last else "NCHW"
-        self.weight_initializer = flow.variance_scaling_initializer(
-            2, "fan_in", "random_normal", data_format=self.data_format
-        )
-        self.weight_regularizer = weight_regularizer
-        self.trainable = trainable
-        self.training = training
-        self.fuse_bn_relu = fuse_bn_relu
-        self.fuse_bn_add_relu = fuse_bn_add_relu
-
-    def _conv2d(
-        self, name, input, filters, kernel_size, strides=1, padding="SAME", dilations=1,
-    ):
-        # There are different shapes of weight metric between 'NCHW' and 'NHWC' mode
-        if self.data_format == "NHWC":
-            shape = (filters, kernel_size, kernel_size, input.shape[3])
-        else:
-            shape = (filters, input.shape[1], kernel_size, kernel_size)
-        weight = flow.get_variable(
-            name + "-weight",
-            shape=shape,
-            dtype=input.dtype,
-            initializer=self.weight_initializer,
-            regularizer=self.weight_regularizer,
-            model_name="weight",
-            trainable=self.trainable,
-        )
-
-        return flow.nn.conv2d(
-            input, weight, strides, padding, self.data_format, dilations, name=name
-        )
-
-    def _batch_norm(self, inputs, name=None, last=False):
-        initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-        axis = 1
-        if self.data_format == "NHWC":
-            axis = 3
-        return flow.layers.batch_normalization(
-            inputs=inputs,
-            axis=axis,
-            momentum=0.9,  # 97,
-            epsilon=1e-5,
-            center=True,
-            scale=True,
-            trainable=self.trainable,
-            training=self.training,
-            gamma_initializer=initializer,
-            moving_variance_initializer=initializer,
-            gamma_regularizer=self.weight_regularizer,
-            beta_regularizer=self.weight_regularizer,
-            name=name,
-        )
-
-    def _batch_norm_relu(self, inputs, name=None, last=False):
-        if self.fuse_bn_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format == "NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_relu(
-                inputs=inputs,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name + "_bn_relu",
-            )
-        else:
-            return flow.nn.relu(self._batch_norm(inputs, name + "_bn", last=last))
-
-    def _batch_norm_add_relu(self, inputs, addend, name=None, last=False):
-        if self.fuse_bn_add_relu:
-            initializer = flow.zeros_initializer() if last else flow.ones_initializer()
-            axis = 1
-            if self.data_format == "NHWC":
-                axis = 3
-            return flow.layers.batch_normalization_add_relu(
-                inputs=inputs,
-                addend=addend,
-                axis=axis,
-                momentum=0.9,
-                epsilon=1e-5,
-                center=True,
-                scale=True,
-                trainable=self.trainable,
-                training=self.training,
-                gamma_initializer=initializer,
-                moving_variance_initializer=initializer,
-                gamma_regularizer=self.weight_regularizer,
-                beta_regularizer=self.weight_regularizer,
-                name=name + "_bn_add_relu",
-            )
-        else:
-            return flow.nn.relu(
-                self._batch_norm(inputs, name + "_bn", last=last) + addend
-            )
-
-    def conv2d_affine(self, input, name, filters, kernel_size, strides):
-        # input data_format must be NCHW, cannot check now
-        padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
-        output = self._conv2d(name, input, filters, kernel_size, strides, padding)
-        return output
-
-    def bottleneck_transformation(
-        self, input, block_name, filters, filters_inner, strides
-    ):
-        a = self.conv2d_affine(input, block_name + "_branch2a", filters_inner, 1, 1)
-        a = self._batch_norm_relu(a, block_name + "_branch2a")
-
-        b = self.conv2d_affine(a, block_name + "_branch2b", filters_inner, 3, strides)
-        b = self._batch_norm_relu(b, block_name + "_branch2b")
-
-        c = self.conv2d_affine(b, block_name + "_branch2c", filters, 1, 1)
-        return c
-
-    def residual_block(self, input, block_name, filters, filters_inner, strides_init):
-        if strides_init != 1 or block_name == "res2_0":
-            shortcut = self.conv2d_affine(
-                input, block_name + "_branch1", filters, 1, strides_init
-            )
-            shortcut = self._batch_norm(shortcut, block_name + "_branch1_bn")
-        else:
-            shortcut = input
-
-        bottleneck = self.bottleneck_transformation(
-            input, block_name, filters, filters_inner, strides_init,
-        )
-        return self._batch_norm_add_relu(
-            bottleneck, shortcut, block_name + "_branch2c", last=True
-        )
-
-    def residual_stage(
-        self, input, stage_name, counts, filters, filters_inner, stride_init=2
-    ):
-        output = input
-        for i in range(counts):
-            block_name = "%s_%d" % (stage_name, i)
-            output = self.residual_block(
-                output, block_name, filters, filters_inner, stride_init if i == 0 else 1
-            )
-
-        return output
-
-    def resnet_conv_x_body(self, input):
-        output = input
-        for i, (counts, filters, filters_inner) in enumerate(
-            zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
-        ):
-            stage_name = "res%d" % (i + 2)
-            output = self.residual_stage(
-                output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2
-            )
-        return output
-
-    def resnet_stem(self, input):
-        conv1 = self._conv2d("conv1", input, 64, 7, 2)
-        conv1_bn = self._batch_norm_relu(conv1, "conv1")
-        pool1 = flow.nn.max_pool2d(
-            conv1_bn,
-            ksize=3,
-            strides=2,
-            padding="SAME",
-            data_format=self.data_format,
-            name="pool1",
-        )
-        return pool1
-
-
-def resnet50(images, args, trainable=True, training=True):
-    weight_regularizer = None
-    builder = ResnetBuilder(
-        weight_regularizer,
-        trainable,
-        training,
-        args.channel_last,
-        args.fuse_bn_relu,
-        args.fuse_bn_add_relu,
-    )
-    if args.pad_output:
-        if args.channel_last:
-            paddings = ((0, 0), (0, 0), (0, 0), (0, 1))
-        else:
-            paddings = ((0, 0), (0, 1), (0, 0), (0, 0))
-        images = flow.pad(images, paddings=paddings)
-    with flow.scope.namespace("Resnet"):
-        stem = builder.resnet_stem(images)
-        body = builder.resnet_conv_x_body(stem)
-        pool5 = flow.nn.avg_pool2d(
-            body,
-            ksize=7,
-            strides=1,
-            padding="VALID",
-            data_format=builder.data_format,
-            name="pool5",
-        )
-        fc1001 = flow.layers.dense(
-            flow.reshape(pool5, (pool5.shape[0], -1)),
-            units=1000,
-            use_bias=True,
-            kernel_initializer=flow.variance_scaling_initializer(
-                2, "fan_in", "random_normal"
-            ),
-            bias_initializer=flow.zeros_initializer(),
-            kernel_regularizer=weight_regularizer,
-            bias_regularizer=weight_regularizer,
-            trainable=trainable,
-            name="fc1001",
-        )
-    return fc1001
diff --git a/save_serving_tool/save_model.py b/save_serving_tool/save_model.py
deleted file mode 100644
index 4e9fb7f319fc0463a3156687bcd7d08cf52573bb..0000000000000000000000000000000000000000
--- a/save_serving_tool/save_model.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import argparse
-import os
-import shutil
-
-import oneflow as flow
-import oneflow.typing as tp
-
-from resnet50_model import resnet50
-
-def _init_oneflow_env_and_config():
-    flow.env.init()
-    flow.enable_eager_execution(False)
-    flow.config.enable_legacy_model_io(True)
-
-def _make_resnet50_predict_func(args):
-    batch_size = 1
-    channels = 3
-
-    func_cfg = flow.function_config()
-    func_cfg.default_placement_scope(flow.scope.placement("cpu", "0:0"))
-
-    @flow.global_function("predict", function_config=func_cfg)
-    def predict_fn(
-        images: tp.Numpy.Placeholder((1, args.image_height, args.image_width, channels), dtype=flow.float)
-    ) -> tp.Numpy:
-        logits = resnet50(images, args, training=False)
-        predictions = flow.nn.softmax(logits)
-        return predictions
-
-    return predict_fn
-
-
-def main(args):
-    _init_oneflow_env_and_config()
-
-    predict_fn = _make_resnet50_predict_func(args)
-    flow.train.CheckPoint().load(args.model_dir)
-    print("predict_fn construct finished")
-
-    saved_model_path = args.save_dir
-    model_version = args.model_version
-
-    model_version_path = os.path.join(saved_model_path, str(model_version))
-    if os.path.exists(model_version_path) and os.path.isdir(model_version_path):
-        if args.force_save:
-            print(
-                f"WARNING: The model version path '{model_version_path}' already exist"
-                ", old version directory will be replaced"
-            )
-            shutil.rmtree(model_version_path)
-        else:
-            raise ValueError(
-                f"The model version path '{model_version_path}' already exist"
-            )
-
-    saved_model_builder = (
-        flow.saved_model.ModelBuilder(saved_model_path)
-        .ModelName(args.model_name)
-        .Version(model_version)
-    )
-    saved_model_builder.AddFunction(predict_fn).Finish()
-    saved_model_builder.Save()
-
-
-def _parse_args():
-    def str2bool(v):
-        if v.lower() in ("yes", "true", "t", "y", "1"):
-            return True
-        elif v.lower() in ("no", "false", "f", "n", "0"):
-            return False
-        else:
-            raise argparse.ArgumentTypeError("Unsupported value encountered.")
-
-    parser = argparse.ArgumentParser("flags for save resnet50 model")
-    parser.add_argument(
-        "--model_dir",
-        type=str,
-        default="resnet50_nhwc",
-        help="model parameters directory",
-    )
-    parser.add_argument(
-        "--save_dir",
-        type=str,
-        default="resnet50_models",
-        help="directory to save models",
-    )
-    parser.add_argument(
-        "--model_name", type=str, default="resnet50", help="model name"
-    )
-    parser.add_argument("--model_version", type=int, default=1, help="model version")
-    parser.add_argument(
-        "--force_save",
-        default=False,
-        action="store_true",
-        help="force save model whether already exists or not",
-    )
-    parser.add_argument(
-        "--image_width", type=int, default=224, help="input image width"
-    )
-    parser.add_argument(
-        "--image_height", type=int, default=224, help="input image height"
-    )
-    parser.add_argument(
-        "--channel_last",
-        type=str2bool,
-        default=True,
-        help="Whether to use use channel last mode(nhwc)",
-    )
-    # fuse bn relu or bn add relu
-    parser.add_argument(
-        "--fuse_bn_relu",
-        type=str2bool,
-        default=False,
-        help="Whether to use use fuse batch normalization relu. Currently supported in origin/master of OneFlow only.",
-    )
-    parser.add_argument(
-        "--fuse_bn_add_relu",
-        type=str2bool,
-        default=False,
-        help="Whether to use use fuse batch normalization add relu. Currently supported in origin/master of OneFlow only.",
-    )
-    parser.add_argument(
-        "--pad_output",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        help="Whether to pad the output to number of image channels to 4.",
-    )
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = _parse_args()
-    main(args)
diff --git a/save_serving_tool/save_model.sh b/save_serving_tool/save_model.sh
deleted file mode 100644
index 5f9bc965a184453b89d06bc7b2ad640334b00676..0000000000000000000000000000000000000000
--- a/save_serving_tool/save_model.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-# download model parameters for first-time
-# wget https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/cpu/resnet50.tar.gz
-# tar zxvf resnet50.tar.gz
-
-base_dir=`dirname $0`
-
-python3 $base_dir/save_model.py \
-    --model_dir resnet50_nhwc \
-    --save_dir resnet50_models \
-    --model_version 1 \
-    --force_save