add oneflow_onnx source

ab36ee44 · BBuf · 9ae78213 · ab36ee44 · ab36ee44 · ab36ee44
28 changed file
--- a/README.md
+++ b/README.md
@@ -25,9 +25,10 @@ onnx>=1.8.0
 onnx-simplifier>=0.3.3
 onnxoptimizer>=0.2.5
 onnxruntime>=1.6.0
-oneflow>=0.3.4
+oneflow (https://github.com/Oneflow-Inc/oneflow#install-with-pip-package)
 ```
 如果你想使用X2OneFlow（X代表TensorFlow/Pytorch/PaddlePaddle）需要安装对应的深度学习框架，需要安装对应的深度学习框架，依赖如下：
 ```sh

--- a/docs/x2oneflow/code_gen.md
+++ b/docs/x2oneflow/code_gen.md
+# X2OneFlow 代码生成
+> 这里记录在X2OneFlow中生成OneFlow代码的支持情况
+## Pytorch
+| 模型         | 是否支持  |
+| ------------ | -------- |
+| LeNet        | Yes      |
+| AlexNet      | Yes      |
+| VGGNet       | Yes      |
+| GoogleNet    | Yes      |
+| ResNet       | Yes      |
+| PreActResNet | Yes      |
+| ResNext      |       |
+| SENet        |       |
+| MobileNetV1  |       |
+| MobileNetV2  |       |
+| MobileNetV3  |       |
+| RegNet       |       |
+| DenseNet     |       |
+| EfficientNet |       |
+| InceptionNet |       |
+| ShuffleNetV1 |       |
+| ShuffleNetV2 |       |
+| SqueezeNet   |       |
+| DPN          |       |
+| PNASNet      |       |
+| DLANet       |       |
+## TensorFlow
+| 模型         | 是否支持  |
+| ------------ | -------- |
+| VGGNet       |       |
+| ResNet       |       |
+| ResNetV2     |       |
+| XceptionNet  |       |
+| MobileNetV1  |       |
+| MobileNetV2  |       |
+| MobileNetV3  |       |
+| DenseNet     |       |
+| EfficientNet |       |
+| InceptionNet |       |
+## PaddlePaddle
+| 模型               | 是否支持  |
+| ------------------ | -------- |
+| AlexNet            |       |
+| VGGNet             |       |
+| GoogleNet          |       |
+| ResNet             |       |
+| ResNext            |       |
+| SE_ResNext         |       |
+| SENet              |       |
+| MobileNetV1        |       |
+| MobileNetV2        |       |
+| MobileNetV3        |       |
+| RegNet             |       |
+| DenseNet           |       |
+| EfficientNet       |       |
+| InceptionNet       |       |
+| ShuffleNetV2       |       |
+| SqueezeNet         |       |
+| DPNNet             |       |
+| DarkNet            |       |
+| GhostNet           |       |
+| RepVGG             |       |
+| XceptionNet        |       |
+| Xception_DeepLab   |       |
+| Vision_Transformer |       |
+| Res2Net            |       |
\ No newline at end of file
--- a/docs/x2oneflow/x2oneflow_model_zoo.md
+++ b/docs/x2oneflow/x2oneflow_model_zoo.md
@@ -4,7 +4,7 @@
 ## Pytorch
-| 模型         | 是否支持 |
+| 模型         | 是否支持  |
 | ------------ | -------- |
 | LeNet        | Yes      |
 | AlexNet      | Yes      |
@@ -30,7 +30,7 @@
 ## TensorFlow
-| 模型         | 是否支持 |
+| 模型         | 是否支持  |
 | ------------ | -------- |
 | VGGNet       | Yes      |
 | ResNet       | Yes      |
@@ -45,32 +45,32 @@
 ## PaddlePaddle
-| 模型               | 是否支持                                                     |
+| 模型               | 是否支持  |
-| ------------------ | ------------------------------------------------------------ |
+| ------------------ | -------- |
-| AlexNet            | Yes                                                          |
+| AlexNet            | Yes      |
-| VGGNet             | Yes                                                          |
+| VGGNet             | Yes      |
-| GoogleNet          | Yes                                                          |
+| GoogleNet          | Yes      |
-| ResNet             | Yes                                                          |
+| ResNet             | Yes      |
-| ResNext            | Yes                                                          |
+| ResNext            | Yes      |
-| SE_ResNext         | Yes                                                          |
+| SE_ResNext         | Yes      |
-| SENet              | Yes                                                          |
+| SENet              | Yes      |
-| MobileNetV1        | Yes                                                          |
+| MobileNetV1        | Yes      |
-| MobileNetV2        | Yes                                                          |
+| MobileNetV2        | Yes      |
-| MobileNetV3        | Yes                                                          |
+| MobileNetV3        | Yes      |
-| RegNet             | Yes                                                          |
+| RegNet             | Yes      |
-| DenseNet           | Yes                                                          |
+| DenseNet           | Yes      |
-| EfficientNet       | Yes                                                          |
+| EfficientNet       | Yes      |
-| InceptionNet       | Yes                                                          |
+| InceptionNet       | Yes      |
-| ShuffleNetV2       | Yes                                                          |
+| ShuffleNetV2       | Yes      |
-| SqueezeNet         | Yes                                                          |
+| SqueezeNet         | Yes      |
-| DPNNet             | Yes                                                          |
+| DPNNet             | Yes      |
-| DarkNet            | Yes                                                          |
+| DarkNet            | Yes      |
-| GhostNet           | Yes                                                          |
+| GhostNet           | Yes      |
-| RepVGG             | Yes                                                          |
+| RepVGG             | Yes      |
-| XceptionNet        | Yes                                                          |
+| XceptionNet        | Yes      |
-| Xception_DeepLab   | Yes                                                          |
+| Xception_DeepLab   | Yes      |
-| Vision_Transformer | Yes                                                          |
+| Vision_Transformer | Yes      |
-| Res2Net            | Yes                                                          |
+| Res2Net            | Yes      |
 - 模型的测试代码均可以在本工程的examples中找到
\ No newline at end of file
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_alexnet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_alexnet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+def test_alexnet():
+    load_pytorch_module_and_check(
+        torchvision.models.alexnet,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow",
+        oneflow_code_gen_flag=True,
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_densenet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_densenet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+"""DenseNet in PyTorch."""
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, growth_rate):
+        super(Bottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
+        self.conv2 = nn.Conv2d(
+            4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False
+        )
+    def forward(self, x):
+        out = self.conv1(F.relu(self.bn1(x)))
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = torch.cat([out, x], 1)
+        return out
+class Transition(nn.Module):
+    def __init__(self, in_planes, out_planes):
+        super(Transition, self).__init__()
+        self.bn = nn.BatchNorm2d(in_planes)
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
+    def forward(self, x):
+        out = self.conv(F.relu(self.bn(x)))
+        out = F.avg_pool2d(out, 2)
+        return out
+class DenseNet(nn.Module):
+    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
+        super(DenseNet, self).__init__()
+        self.growth_rate = growth_rate
+        num_planes = 2 * growth_rate
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
+        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
+        num_planes += nblocks[0] * growth_rate
+        out_planes = int(math.floor(num_planes * reduction))
+        self.trans1 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
+        num_planes += nblocks[1] * growth_rate
+        out_planes = int(math.floor(num_planes * reduction))
+        self.trans2 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
+        num_planes += nblocks[2] * growth_rate
+        out_planes = int(math.floor(num_planes * reduction))
+        self.trans3 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
+        num_planes += nblocks[3] * growth_rate
+        self.bn = nn.BatchNorm2d(num_planes)
+        self.linear = nn.Linear(num_planes, num_classes)
+    def _make_dense_layers(self, block, in_planes, nblock):
+        layers = []
+        for i in range(nblock):
+            layers.append(block(in_planes, self.growth_rate))
+            in_planes += self.growth_rate
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.trans1(self.dense1(out))
+        out = self.trans2(self.dense2(out))
+        out = self.trans3(self.dense3(out))
+        out = self.dense4(out)
+        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def DenseNet121():
+    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=32)
+def DenseNet169():
+    return DenseNet(Bottleneck, [6, 12, 32, 32], growth_rate=32)
+def DenseNet201():
+    return DenseNet(Bottleneck, [6, 12, 48, 32], growth_rate=32)
+def DenseNet161():
+    return DenseNet(Bottleneck, [6, 12, 36, 24], growth_rate=48)
+def densenet_cifar():
+    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=12)
+def test_densenet():
+    load_pytorch_module_and_check(
+        densenet_cifar, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dlanet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_dlanet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+'''DLA in PyTorch.
+Reference:
+    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Root(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1):
+        super(Root, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, kernel_size,
+            stride=1, padding=(kernel_size - 1) // 2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+    def forward(self, xs):
+        x = torch.cat(xs, 1)
+        out = F.relu(self.bn(self.conv(x)))
+        return out
+class Tree(nn.Module):
+    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
+        super(Tree, self).__init__()
+        self.level = level
+        if level == 1:
+            self.root = Root(2*out_channels, out_channels)
+            self.left_node = block(in_channels, out_channels, stride=stride)
+            self.right_node = block(out_channels, out_channels, stride=1)
+        else:
+            self.root = Root((level+2)*out_channels, out_channels)
+            for i in reversed(range(1, level)):
+                subtree = Tree(block, in_channels, out_channels,
+                               level=i, stride=stride)
+                self.__setattr__('level_%d' % i, subtree)
+            self.prev_root = block(in_channels, out_channels, stride=stride)
+            self.left_node = block(out_channels, out_channels, stride=1)
+            self.right_node = block(out_channels, out_channels, stride=1)
+    def forward(self, x):
+        xs = [self.prev_root(x)] if self.level > 1 else []
+        for i in reversed(range(1, self.level)):
+            level_i = self.__getattr__('level_%d' % i)
+            x = level_i(x)
+            xs.append(x)
+        x = self.left_node(x)
+        xs.append(x)
+        x = self.right_node(x)
+        xs.append(x)
+        out = self.root(xs)
+        return out
+class DLA(nn.Module):
+    def __init__(self, block=BasicBlock, num_classes=10):
+        super(DLA, self).__init__()
+        self.base = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True)
+        )
+        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
+        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
+        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
+        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
+        self.linear = nn.Linear(512, num_classes)
+    def forward(self, x):
+        out = self.base(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = self.layer6(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test_dlanet():
+    load_pytorch_module_and_check(
+        DLA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_dpn.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_dpn.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+'''Dual Path Networks in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Bottleneck(nn.Module):
+    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
+        super(Bottleneck, self).__init__()
+        self.out_planes = out_planes
+        self.dense_depth = dense_depth
+        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_planes)
+        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
+        self.shortcut = nn.Sequential()
+        if first_layer:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_planes+dense_depth)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        x = self.shortcut(x)
+        d = self.out_planes
+        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
+        out = F.relu(out)
+        return out
+class DPN(nn.Module):
+    def __init__(self, cfg):
+        super(DPN, self).__init__()
+        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
+        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.last_planes = 64
+        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
+        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
+        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
+        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
+        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
+    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for i,stride in enumerate(strides):
+            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
+            self.last_planes = out_planes + (i+2) * dense_depth
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def DPN26():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (2,2,2,2),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+def DPN92():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (3,4,20,3),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+def test_dpn():
+    load_pytorch_module_and_check(
+        DPN26, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_efficientnet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_efficientnet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/efficientnet.py
+def swish(x):
+    return x * x.sigmoid()
+def drop_connect(x, drop_ratio):
+    keep_ratio = 1.0 - drop_ratio
+    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+    mask.bernoulli_(keep_ratio)
+    x.div_(keep_ratio)
+    x.mul_(mask)
+    return x
+class SE(nn.Module):
+    """Squeeze-and-Excitation block with Swish."""
+    def __init__(self, in_channels, se_channels):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_channels, se_channels, kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_channels, in_channels, kernel_size=1, bias=True)
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = swish(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+class Block(nn.Module):
+    """expansion + depthwise + pointwise + squeeze-excitation"""
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        expand_ratio=1,
+        se_ratio=0.0,
+        drop_rate=0.0,
+    ):
+        super(Block, self).__init__()
+        self.stride = stride
+        self.drop_rate = drop_rate
+        self.expand_ratio = expand_ratio
+        # Expansion
+        channels = expand_ratio * in_channels
+        self.conv1 = nn.Conv2d(
+            in_channels, channels, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(channels)
+        # Depthwise conv
+        self.conv2 = nn.Conv2d(
+            channels,
+            channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(1 if kernel_size == 3 else 2),
+            groups=channels,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(channels)
+        # SE layers
+        se_channels = int(in_channels * se_ratio)
+        self.se = SE(channels, se_channels)
+        # Output
+        self.conv3 = nn.Conv2d(
+            channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(out_channels)
+        # Skip connection if in and out shapes are the same (MV-V2 style)
+        self.has_skip = (stride == 1) and (in_channels == out_channels)
+    def forward(self, x):
+        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
+        out = swish(self.bn2(self.conv2(out)))
+        out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        if self.has_skip:
+            if self.training and self.drop_rate > 0:
+                out = drop_connect(out, self.drop_rate)
+            out = out + x
+        return out
+class EfficientNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(EfficientNet, self).__init__()
+        self.cfg = cfg
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_channels=32)
+        self.linear = nn.Linear(cfg["out_channels"][-1], num_classes)
+    def _make_layers(self, in_channels):
+        layers = []
+        cfg = [
+            self.cfg[k]
+            for k in [
+                "expansion",
+                "out_channels",
+                "num_blocks",
+                "kernel_size",
+                "stride",
+            ]
+        ]
+        b = 0
+        blocks = sum(self.cfg["num_blocks"])
+        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
+            strides = [stride] + [1] * (num_blocks - 1)
+            for stride in strides:
+                drop_rate = self.cfg["drop_connect_rate"] * b / blocks
+                layers.append(
+                    Block(
+                        in_channels,
+                        out_channels,
+                        kernel_size,
+                        stride,
+                        expansion,
+                        se_ratio=0.25,
+                        drop_rate=drop_rate,
+                    )
+                )
+                in_channels = out_channels
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = swish(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.adaptive_avg_pool2d(out, 1)
+        out = out.view(out.size(0), -1)
+        dropout_rate = self.cfg["dropout_rate"]
+        if self.training and dropout_rate > 0:
+            out = F.dropout(out, p=dropout_rate)
+        out = self.linear(out)
+        return out
+def EfficientNetB0():
+    cfg = {
+        "num_blocks": [1, 2, 2, 3, 3, 4, 1],
+        "expansion": [1, 6, 6, 6, 6, 6, 6],
+        "out_channels": [16, 24, 40, 80, 112, 192, 320],
+        "kernel_size": [3, 3, 5, 3, 5, 5, 3],
+        "stride": [1, 2, 2, 2, 1, 2, 1],
+        "dropout_rate": 0.2,
+        "drop_connect_rate": 0.2,
+    }
+    return EfficientNet(cfg)
+def test_efficientNetB0():
+    load_pytorch_module_and_check(
+        EfficientNetB0, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_googlenet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_googlenet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/googlenet.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Inception(nn.Module):
+    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
+        super(Inception, self).__init__()
+        # 1x1 conv branch
+        self.b1 = nn.Sequential(
+            nn.Conv2d(in_planes, n1x1, kernel_size=1),
+            nn.BatchNorm2d(n1x1),
+            nn.ReLU(True),
+        )
+        # 1x1 conv -> 3x3 conv branch
+        self.b2 = nn.Sequential(
+            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
+            nn.BatchNorm2d(n3x3red),
+            nn.ReLU(True),
+            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n3x3),
+            nn.ReLU(True),
+        )
+        # 1x1 conv -> 5x5 conv branch
+        self.b3 = nn.Sequential(
+            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
+            nn.BatchNorm2d(n5x5red),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+        )
+        # 3x3 pool -> 1x1 conv branch
+        self.b4 = nn.Sequential(
+            nn.MaxPool2d(3, stride=1, padding=1),
+            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
+            nn.BatchNorm2d(pool_planes),
+            nn.ReLU(True),
+        )
+    def forward(self, x):
+        y1 = self.b1(x)
+        y2 = self.b2(x)
+        y3 = self.b3(x)
+        y4 = self.b4(x)
+        return torch.cat([y1, y2, y3, y4], 1)
+class GoogLeNet(nn.Module):
+    def __init__(self):
+        super(GoogLeNet, self).__init__()
+        self.pre_layers = nn.Sequential(
+            nn.Conv2d(3, 192, kernel_size=3, padding=1),
+            nn.BatchNorm2d(192),
+            nn.ReLU(True),
+        )
+        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
+        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
+        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
+        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
+        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
+        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
+        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
+        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
+        self.avgpool = nn.AvgPool2d(8, stride=1)
+        self.linear = nn.Linear(1024, 10)
+    def forward(self, x):
+        out = self.pre_layers(x)
+        out = self.a3(out)
+        out = self.b3(out)
+        out = self.maxpool(out)
+        out = self.a4(out)
+        out = self.b4(out)
+        out = self.c4(out)
+        out = self.d4(out)
+        out = self.e4(out)
+        out = self.maxpool(out)
+        out = self.a5(out)
+        out = self.b5(out)
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test_googlenet():
+    load_pytorch_module_and_check(
+        GoogLeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_inception.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_inception.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/inceptionv3.py
+class BasicConv2d(nn.Module):
+    def __init__(self, input_channels, output_channels, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs)
+        self.bn = nn.BatchNorm2d(output_channels)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+# same naive inception module
+class InceptionA(nn.Module):
+    def __init__(self, input_channels, pool_features):
+        super().__init__()
+        self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1)
+        self.branch5x5 = nn.Sequential(
+            BasicConv2d(input_channels, 48, kernel_size=1),
+            BasicConv2d(48, 64, kernel_size=5, padding=2),
+        )
+        self.branch3x3 = nn.Sequential(
+            BasicConv2d(input_channels, 64, kernel_size=1),
+            BasicConv2d(64, 96, kernel_size=3, padding=1),
+            BasicConv2d(96, 96, kernel_size=3, padding=1),
+        )
+        self.branchpool = nn.Sequential(
+            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
+            BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1),
+        )
+    def forward(self, x):
+        # x -> 1x1(same)
+        branch1x1 = self.branch1x1(x)
+        # x -> 1x1 -> 5x5(same)
+        branch5x5 = self.branch5x5(x)
+        # branch5x5 = self.branch5x5_2(branch5x5)
+        # x -> 1x1 -> 3x3 -> 3x3(same)
+        branch3x3 = self.branch3x3(x)
+        # x -> pool -> 1x1(same)
+        branchpool = self.branchpool(x)
+        outputs = [branch1x1, branch5x5, branch3x3, branchpool]
+        return torch.cat(outputs, 1)
+# downsample
+# Factorization into smaller convolutions
+class InceptionB(nn.Module):
+    def __init__(self, input_channels):
+        super().__init__()
+        self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2)
+        self.branch3x3stack = nn.Sequential(
+            BasicConv2d(input_channels, 64, kernel_size=1),
+            BasicConv2d(64, 96, kernel_size=3, padding=1),
+            BasicConv2d(96, 96, kernel_size=3, stride=2),
+        )
+        self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2)
+    def forward(self, x):
+        # x - > 3x3(downsample)
+        branch3x3 = self.branch3x3(x)
+        # x -> 3x3 -> 3x3(downsample)
+        branch3x3stack = self.branch3x3stack(x)
+        # x -> avgpool(downsample)
+        branchpool = self.branchpool(x)
+        # """We can use two parallel stride 2 blocks: P and C. P is a pooling
+        # layer (either average or maximum pooling) the activation, both of
+        # them are stride 2 the filter banks of which are concatenated as in
+        # figure 10."""
+        outputs = [branch3x3, branch3x3stack, branchpool]
+        return torch.cat(outputs, 1)
+# Factorizing Convolutions with Large Filter Size
+class InceptionC(nn.Module):
+    def __init__(self, input_channels, channels_7x7):
+        super().__init__()
+        self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1)
+        c7 = channels_7x7
+        # In theory, we could go even further and argue that one can replace any n × n
+        # convolution by a 1 × n convolution followed by a n × 1 convolution and the
+        # computational cost saving increases dramatically as n grows (see figure 6).
+        self.branch7x7 = nn.Sequential(
+            BasicConv2d(input_channels, c7, kernel_size=1),
+            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
+            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
+        )
+        self.branch7x7stack = nn.Sequential(
+            BasicConv2d(input_channels, c7, kernel_size=1),
+            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
+            BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)),
+            BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)),
+            BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)),
+        )
+        self.branch_pool = nn.Sequential(
+            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
+            BasicConv2d(input_channels, 192, kernel_size=1),
+        )
+    def forward(self, x):
+        # x -> 1x1(same)
+        branch1x1 = self.branch1x1(x)
+        # x -> 1layer 1*7 and 7*1 (same)
+        branch7x7 = self.branch7x7(x)
+        # x-> 2layer 1*7 and 7*1(same)
+        branch7x7stack = self.branch7x7stack(x)
+        # x-> avgpool (same)
+        branchpool = self.branch_pool(x)
+        outputs = [branch1x1, branch7x7, branch7x7stack, branchpool]
+        return torch.cat(outputs, 1)
+class InceptionD(nn.Module):
+    def __init__(self, input_channels):
+        super().__init__()
+        self.branch3x3 = nn.Sequential(
+            BasicConv2d(input_channels, 192, kernel_size=1),
+            BasicConv2d(192, 320, kernel_size=3, stride=2),
+        )
+        self.branch7x7 = nn.Sequential(
+            BasicConv2d(input_channels, 192, kernel_size=1),
+            BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)),
+            BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)),
+            BasicConv2d(192, 192, kernel_size=3, stride=2),
+        )
+        self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2)
+    def forward(self, x):
+        # x -> 1x1 -> 3x3(downsample)
+        branch3x3 = self.branch3x3(x)
+        # x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample)
+        branch7x7 = self.branch7x7(x)
+        # x -> avgpool (downsample)
+        branchpool = self.branchpool(x)
+        outputs = [branch3x3, branch7x7, branchpool]
+        return torch.cat(outputs, 1)
+# same
+class InceptionE(nn.Module):
+    def __init__(self, input_channels):
+        super().__init__()
+        self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1)
+        self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1)
+        self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1))
+        self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0))
+        self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1)
+        self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1)
+        self.branch3x3stack_3a = BasicConv2d(
+            384, 384, kernel_size=(1, 3), padding=(0, 1)
+        )
+        self.branch3x3stack_3b = BasicConv2d(
+            384, 384, kernel_size=(3, 1), padding=(1, 0)
+        )
+        self.branch_pool = nn.Sequential(
+            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
+            BasicConv2d(input_channels, 192, kernel_size=1),
+        )
+    def forward(self, x):
+        # x -> 1x1 (same)
+        branch1x1 = self.branch1x1(x)
+        # x -> 1x1 -> 3x1
+        # x -> 1x1 -> 1x3
+        # concatenate(3x1, 1x3)
+        # """7. Inception modules with expanded the filter bank outputs.
+        # This architecture is used on the coarsest (8 × 8) grids to promote
+        # high dimensional representations, as suggested by principle
+        # 2 of Section 2."""
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)]
+        branch3x3 = torch.cat(branch3x3, 1)
+        # x -> 1x1 -> 3x3 -> 1x3
+        # x -> 1x1 -> 3x3 -> 3x1
+        # concatenate(1x3, 3x1)
+        branch3x3stack = self.branch3x3stack_1(x)
+        branch3x3stack = self.branch3x3stack_2(branch3x3stack)
+        branch3x3stack = [
+            self.branch3x3stack_3a(branch3x3stack),
+            self.branch3x3stack_3b(branch3x3stack),
+        ]
+        branch3x3stack = torch.cat(branch3x3stack, 1)
+        branchpool = self.branch_pool(x)
+        outputs = [branch1x1, branch3x3, branch3x3stack, branchpool]
+        return torch.cat(outputs, 1)
+class InceptionV3(nn.Module):
+    def __init__(self, num_classes=100):
+        super().__init__()
+        self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, padding=1)
+        self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1)
+        self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
+        self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
+        self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
+        # naive inception module
+        self.Mixed_5b = InceptionA(192, pool_features=32)
+        self.Mixed_5c = InceptionA(256, pool_features=64)
+        self.Mixed_5d = InceptionA(288, pool_features=64)
+        # downsample
+        self.Mixed_6a = InceptionB(288)
+        self.Mixed_6b = InceptionC(768, channels_7x7=128)
+        self.Mixed_6c = InceptionC(768, channels_7x7=160)
+        self.Mixed_6d = InceptionC(768, channels_7x7=160)
+        self.Mixed_6e = InceptionC(768, channels_7x7=192)
+        # downsample
+        self.Mixed_7a = InceptionD(768)
+        self.Mixed_7b = InceptionE(1280)
+        self.Mixed_7c = InceptionE(2048)
+        # 6*6 feature size
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.dropout = nn.Dropout2d()
+        self.linear = nn.Linear(2048, num_classes)
+    def forward(self, x):
+        # 32 -> 30
+        x = self.Conv2d_1a_3x3(x)
+        x = self.Conv2d_2a_3x3(x)
+        x = self.Conv2d_2b_3x3(x)
+        x = self.Conv2d_3b_1x1(x)
+        x = self.Conv2d_4a_3x3(x)
+        # 30 -> 30
+        x = self.Mixed_5b(x)
+        x = self.Mixed_5c(x)
+        x = self.Mixed_5d(x)
+        # 30 -> 14
+        # Efficient Grid Size Reduction to avoid representation
+        # bottleneck
+        x = self.Mixed_6a(x)
+        # 14 -> 14
+        # """In practice, we have found that employing this factorization does not
+        # work well on early layers, but it gives very good results on medium
+        # grid-sizes (On m × m feature maps, where m ranges between 12 and 20).
+        # On that level, very good results can be achieved by using 1 × 7 convolutions
+        # followed by 7 × 1 convolutions."""
+        x = self.Mixed_6b(x)
+        x = self.Mixed_6c(x)
+        x = self.Mixed_6d(x)
+        x = self.Mixed_6e(x)
+        # 14 -> 6
+        # Efficient Grid Size Reduction
+        x = self.Mixed_7a(x)
+        # 6 -> 6
+        # We are using this solution only on the coarsest grid,
+        # since that is the place where producing high dimensional
+        # sparse representation is the most critical as the ratio of
+        # local processing (by 1 × 1 convolutions) is increased compared
+        # to the spatial aggregation."""
+        x = self.Mixed_7b(x)
+        x = self.Mixed_7c(x)
+        # 6 -> 1
+        x = self.avgpool(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.linear(x)
+        return x
+def test_inception_v3():
+    load_pytorch_module_and_check(
+        InceptionV3, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_lenet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_lenet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/lenet.py
+'''LeNet in PyTorch.'''
+import torch.nn as nn
+import torch.nn.functional as F
+class LeNet(nn.Module):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+def test_lenet():
+    load_pytorch_module_and_check(
+        LeNet, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v1.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v1.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/mobilenet.py
+import torch
+import torch.nn as nn
+class DepthSeperabelConv2d(nn.Module):
+    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
+        super().__init__()
+        self.depthwise = nn.Sequential(
+            nn.Conv2d(
+                input_channels,
+                input_channels,
+                kernel_size,
+                groups=input_channels,
+                **kwargs
+            ),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU(inplace=True),
+        )
+        self.pointwise = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 1),
+            nn.BatchNorm2d(output_channels),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+class BasicConv2d(nn.Module):
+    def __init__(self, input_channels, output_channels, kernel_size, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs)
+        self.bn = nn.BatchNorm2d(output_channels)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+class MobileNetV1(nn.Module):
+    """
+    Args:
+        width multipler: The role of the width multiplier α is to thin
+                         a network uniformly at each layer. For a given
+                         layer and width multiplier α, the number of
+                         input channels M becomes αM and the number of
+                         output channels N becomes αN.
+    """
+    def __init__(self, width_multiplier=1, class_num=100):
+        super().__init__()
+        alpha = width_multiplier
+        self.stem = nn.Sequential(
+            BasicConv2d(3, int(32 * alpha), 3, padding=1, bias=False),
+            DepthSeperabelConv2d(
+                int(32 * alpha), int(64 * alpha), 3, padding=1, bias=False
+            ),
+        )
+        # downsample
+        self.conv1 = nn.Sequential(
+            DepthSeperabelConv2d(
+                int(64 * alpha), int(128 * alpha), 3, stride=2, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(128 * alpha), int(128 * alpha), 3, padding=1, bias=False
+            ),
+        )
+        # downsample
+        self.conv2 = nn.Sequential(
+            DepthSeperabelConv2d(
+                int(128 * alpha), int(256 * alpha), 3, stride=2, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(256 * alpha), int(256 * alpha), 3, padding=1, bias=False
+            ),
+        )
+        # downsample
+        self.conv3 = nn.Sequential(
+            DepthSeperabelConv2d(
+                int(256 * alpha), int(512 * alpha), 3, stride=2, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(512 * alpha), 3, padding=1, bias=False
+            ),
+        )
+        # downsample
+        self.conv4 = nn.Sequential(
+            DepthSeperabelConv2d(
+                int(512 * alpha), int(1024 * alpha), 3, stride=2, padding=1, bias=False
+            ),
+            DepthSeperabelConv2d(
+                int(1024 * alpha), int(1024 * alpha), 3, padding=1, bias=False
+            ),
+        )
+        self.fc = nn.Linear(int(1024 * alpha), class_num)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+    def forward(self, x):
+        x = self.stem(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.avg(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def test_mobilenet_v1():
+    load_pytorch_module_and_check(
+        MobileNetV1, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v2.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v2.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+def test_mobilenet_v2():
+    load_pytorch_module_and_check(
+        torchvision.models.mobilenet_v2,
+        input_size=(1, 3, 224, 224),
+        input_min_val=0,
+        input_max_val=1,
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v3.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_mobilenet_v3.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init
+# https://github.com/xiaolai-sqlai/mobilenetv3/blob/master/mobilenetv3.py
+class hswish(nn.Module):
+    def forward(self, x):
+        out = x * F.relu6(x + 3, inplace=True) / 6
+        return out
+class hsigmoid(nn.Module):
+    def forward(self, x):
+        out = F.relu6(x + 3, inplace=True) / 6
+        return out
+class SeModule(nn.Module):
+    def __init__(self, in_size, reduction=4):
+        super(SeModule, self).__init__()
+        self.se = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(
+                in_size,
+                in_size // reduction,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+            ),
+            nn.BatchNorm2d(in_size // reduction),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                in_size // reduction,
+                in_size,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=False,
+            ),
+            nn.BatchNorm2d(in_size),
+            hsigmoid(),
+        )
+    def forward(self, x):
+        return x * self.se(x)
+class Block(nn.Module):
+    """expand + depthwise + pointwise"""
+    def __init__(
+        self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride
+    ):
+        super(Block, self).__init__()
+        self.stride = stride
+        self.se = semodule
+        self.conv1 = nn.Conv2d(
+            in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(expand_size)
+        self.nolinear1 = nolinear
+        self.conv2 = nn.Conv2d(
+            expand_size,
+            expand_size,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=expand_size,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(expand_size)
+        self.nolinear2 = nolinear
+        self.conv3 = nn.Conv2d(
+            expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(out_size)
+        self.shortcut = nn.Sequential()
+        if stride == 1 and in_size != out_size:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False
+                ),
+                nn.BatchNorm2d(out_size),
+            )
+    def forward(self, x):
+        out = self.nolinear1(self.bn1(self.conv1(x)))
+        out = self.nolinear2(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        if self.se != None:
+            out = self.se(out)
+        out = out + self.shortcut(x) if self.stride == 1 else out
+        return out
+class MobileNetV3_Large(nn.Module):
+    def __init__(self, num_classes=1000):
+        super(MobileNetV3_Large, self).__init__()
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.hs1 = hswish()
+        self.bneck = nn.Sequential(
+            Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
+            Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
+            Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
+            Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
+            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
+            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
+            Block(3, 40, 240, 80, hswish(), None, 2),
+            Block(3, 80, 200, 80, hswish(), None, 1),
+            Block(3, 80, 184, 80, hswish(), None, 1),
+            Block(3, 80, 184, 80, hswish(), None, 1),
+            Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
+            Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
+            Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
+            Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
+            Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
+        )
+        self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(960)
+        self.hs2 = hswish()
+        self.linear3 = nn.Linear(960, 1280)
+        self.bn3 = nn.BatchNorm1d(1280)
+        self.hs3 = hswish()
+        self.linear4 = nn.Linear(1280, num_classes)
+        self.init_params()
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, mode="fan_out")
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+    def forward(self, x):
+        out = self.hs1(self.bn1(self.conv1(x)))
+        out = self.bneck(out)
+        out = self.hs2(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 7)
+        out = out.view(out.size(0), -1)
+        out = self.hs3(self.bn3(self.linear3(out)))
+        out = self.linear4(out)
+        return out
+class MobileNetV3_Small(nn.Module):
+    def __init__(self, num_classes=1000):
+        super(MobileNetV3_Small, self).__init__()
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.hs1 = hswish()
+        self.bneck = nn.Sequential(
+            Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
+            Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
+            Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
+            Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
+            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
+            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
+            Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
+            Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
+            Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
+            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
+            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
+        )
+        self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(576)
+        self.hs2 = hswish()
+        self.linear3 = nn.Linear(576, 1280)
+        self.bn3 = nn.BatchNorm1d(1280)
+        self.hs3 = hswish()
+        self.linear4 = nn.Linear(1280, num_classes)
+        self.init_params()
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, mode="fan_out")
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                init.normal_(m.weight, std=0.001)
+                if m.bias is not None:
+                    init.constant_(m.bias, 0)
+    def forward(self, x):
+        out = self.hs1(self.bn1(self.conv1(x)))
+        out = self.bneck(out)
+        out = self.hs2(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 7)
+        out = out.view(out.size(0), -1)
+        out = self.hs3(self.bn3(self.linear3(out)))
+        out = self.linear4(out)
+        return out
+def test_MobileNetV3_Large():
+    load_pytorch_module_and_check(
+        MobileNetV3_Large, input_size=(1, 3, 224, 224), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_pnasnet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_pnasnet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/pnasnet.py
+'''PNASNet in PyTorch.
+Paper: Progressive Neural Architecture Search
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class SepConv(nn.Module):
+    '''Separable Convolution.'''
+    def __init__(self, in_planes, out_planes, kernel_size, stride):
+        super(SepConv, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, out_planes,
+                               kernel_size, stride,
+                               padding=(kernel_size-1)//2,
+                               bias=False, groups=in_planes)
+        self.bn1 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        return self.bn1(self.conv1(x))
+class CellA(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellA, self).__init__()
+        self.stride = stride
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        y1 = self.sep_conv1(x)
+        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y2 = self.bn1(self.conv1(y2))
+        return F.relu(y1+y2)
+class CellB(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellB, self).__init__()
+        self.stride = stride
+        # Left branch
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
+        # Right branch
+        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+        # Reduce channels
+        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        # Left branch
+        y1 = self.sep_conv1(x)
+        y2 = self.sep_conv2(x)
+        # Right branch
+        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y3 = self.bn1(self.conv1(y3))
+        y4 = self.sep_conv3(x)
+        # Concat & reduce channels
+        b1 = F.relu(y1+y2)
+        b2 = F.relu(y3+y4)
+        y = torch.cat([b1,b2], 1)
+        return F.relu(self.bn2(self.conv2(y)))
+class PNASNet(nn.Module):
+    def __init__(self, cell_type, num_cells, num_planes):
+        super(PNASNet, self).__init__()
+        self.in_planes = num_planes
+        self.cell_type = cell_type
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_planes)
+        self.layer1 = self._make_layer(num_planes, num_cells=6)
+        self.layer2 = self._downsample(num_planes*2)
+        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
+        self.layer4 = self._downsample(num_planes*4)
+        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
+        self.linear = nn.Linear(num_planes*4, 10)
+    def _make_layer(self, planes, num_cells):
+        layers = []
+        for _ in range(num_cells):
+            layers.append(self.cell_type(self.in_planes, planes, stride=1))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+    def _downsample(self, planes):
+        layer = self.cell_type(self.in_planes, planes, stride=2)
+        self.in_planes = planes
+        return layer
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = F.avg_pool2d(out, 8)
+        out = self.linear(out.view(out.size(0), -1))
+        return out
+def PNASNetA():
+    return PNASNet(CellA, num_cells=6, num_planes=44)
+def PNASNetB():
+    return PNASNet(CellB, num_cells=6, num_planes=32)
+def test_pnasnet():
+    load_pytorch_module_and_check(
+        PNASNetA, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow"
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_preact_resnet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_preact_resnet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+'''Pre-activation ResNet in PyTorch.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PreActBlock(nn.Module):
+    '''Pre-activation version of the BasicBlock.'''
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out += shortcut
+        return out
+class PreActBottleneck(nn.Module):
+    '''Pre-activation version of the original Bottleneck module.'''
+    expansion = 4
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = self.conv3(F.relu(self.bn3(out)))
+        out += shortcut
+        return out
+class PreActResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(PreActResNet, self).__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def PreActResNet18():
+    return PreActResNet(PreActBlock, [2,2,2,2])
+def PreActResNet34():
+    return PreActResNet(PreActBlock, [3,4,6,3])
+def PreActResNet50():
+    return PreActResNet(PreActBottleneck, [3,4,6,3])
+def PreActResNet101():
+    return PreActResNet(PreActBottleneck, [3,4,23,3])
+def PreActResNet152():
+    return PreActResNet(PreActBottleneck, [3,8,36,3])
+def test_preact_resnet():
+    load_pytorch_module_and_check(
+        PreActResNet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow", oneflow_code_gen_flag=True
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_regnet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_regnet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/regnet.py
+class SE(nn.Module):
+    """Squeeze-and-Excitation block."""
+    def __init__(self, in_planes, se_planes):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = F.relu(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+class Block(nn.Module):
+    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
+        super(Block, self).__init__()
+        # 1x1
+        w_b = int(round(w_out * bottleneck_ratio))
+        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(w_b)
+        # 3x3
+        num_groups = w_b // group_width
+        self.conv2 = nn.Conv2d(
+            w_b,
+            w_b,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=num_groups,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(w_b)
+        # se
+        self.with_se = se_ratio > 0
+        if self.with_se:
+            w_se = int(round(w_in * se_ratio))
+            self.se = SE(w_b, w_se)
+        # 1x1
+        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(w_out)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or w_in != w_out:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(w_in, w_out, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(w_out),
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        if self.with_se:
+            out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class RegNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(RegNet, self).__init__()
+        self.cfg = cfg
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(0)
+        self.layer2 = self._make_layer(1)
+        self.layer3 = self._make_layer(2)
+        self.layer4 = self._make_layer(3)
+        self.linear = nn.Linear(self.cfg["widths"][-1], num_classes)
+    def _make_layer(self, idx):
+        depth = self.cfg["depths"][idx]
+        width = self.cfg["widths"][idx]
+        stride = self.cfg["strides"][idx]
+        group_width = self.cfg["group_width"]
+        bottleneck_ratio = self.cfg["bottleneck_ratio"]
+        se_ratio = self.cfg["se_ratio"]
+        layers = []
+        for i in range(depth):
+            s = stride if i == 0 else 1
+            layers.append(
+                Block(self.in_planes, width, s, group_width, bottleneck_ratio, se_ratio)
+            )
+            self.in_planes = width
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.adaptive_avg_pool2d(out, (1, 1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def RegNetX_200MF():
+    cfg = {
+        "depths": [1, 1, 4, 7],
+        "widths": [24, 56, 152, 368],
+        "strides": [1, 1, 2, 2],
+        "group_width": 8,
+        "bottleneck_ratio": 1,
+        "se_ratio": 0,
+    }
+    return RegNet(cfg)
+def RegNetX_400MF():
+    cfg = {
+        "depths": [1, 2, 7, 12],
+        "widths": [32, 64, 160, 384],
+        "strides": [1, 1, 2, 2],
+        "group_width": 16,
+        "bottleneck_ratio": 1,
+        "se_ratio": 0,
+    }
+    return RegNet(cfg)
+def RegNetY_400MF():
+    cfg = {
+        "depths": [1, 2, 7, 12],
+        "widths": [32, 64, 160, 384],
+        "strides": [1, 1, 2, 2],
+        "group_width": 16,
+        "bottleneck_ratio": 1,
+        "se_ratio": 0.25,
+    }
+    return RegNet(cfg)
+def test_regnet():
+    load_pytorch_module_and_check(
+        RegNetX_200MF, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnet18.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnet18.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+def test_resnet18():
+    load_pytorch_module_and_check(
+        torchvision.models.resnet18,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow",
+        oneflow_code_gen_flag=True
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnext.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_resnext.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnext.py
+class Block(nn.Module):
+    """Grouped convolution block."""
+    expansion = 2
+    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
+        super(Block, self).__init__()
+        group_width = cardinality * bottleneck_width
+        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(group_width)
+        self.conv2 = nn.Conv2d(
+            group_width,
+            group_width,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=cardinality,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(group_width)
+        self.conv3 = nn.Conv2d(
+            group_width, self.expansion * group_width, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(self.expansion * group_width)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * group_width:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * group_width,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(self.expansion * group_width),
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class ResNeXt(nn.Module):
+    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
+        super(ResNeXt, self).__init__()
+        self.cardinality = cardinality
+        self.bottleneck_width = bottleneck_width
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(num_blocks[0], 1)
+        self.layer2 = self._make_layer(num_blocks[1], 2)
+        self.layer3 = self._make_layer(num_blocks[2], 2)
+        # self.layer4 = self._make_layer(num_blocks[3], 2)
+        self.linear = nn.Linear(cardinality * bottleneck_width * 8, num_classes)
+    def _make_layer(self, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        layers = []
+        for stride in strides:
+            layers.append(
+                Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)
+            )
+            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
+        # Increase bottleneck_width by 2 after each stage.
+        self.bottleneck_width *= 2
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        # out = self.layer4(out)
+        out = F.avg_pool2d(out, 8)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def ResNeXt29_2x64d():
+    return ResNeXt(num_blocks=[3, 3, 3], cardinality=2, bottleneck_width=64)
+def ResNeXt29_4x64d():
+    return ResNeXt(num_blocks=[3, 3, 3], cardinality=4, bottleneck_width=64)
+def ResNeXt29_8x64d():
+    return ResNeXt(num_blocks=[3, 3, 3], cardinality=8, bottleneck_width=64)
+def ResNeXt29_32x4d():
+    return ResNeXt(num_blocks=[3, 3, 3], cardinality=32, bottleneck_width=4)
+def test_resnext():
+    load_pytorch_module_and_check(
+        ResNeXt29_2x64d, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_senet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_senet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/senet.py
+class BasicResidualSEBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_channels, out_channels, stride, r=16):
+        super().__init__()
+        self.residual = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels * self.expansion, 3, padding=1),
+            nn.BatchNorm2d(out_channels * self.expansion),
+            nn.ReLU(inplace=True),
+        )
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels * self.expansion:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
+                nn.BatchNorm2d(out_channels * self.expansion),
+            )
+        self.squeeze = nn.AdaptiveAvgPool2d(1)
+        self.excitation = nn.Sequential(
+            nn.Linear(
+                out_channels * self.expansion, out_channels * self.expansion // r
+            ),
+            nn.ReLU(inplace=True),
+            nn.Linear(
+                out_channels * self.expansion // r, out_channels * self.expansion
+            ),
+            nn.Sigmoid(),
+        )
+    def forward(self, x):
+        shortcut = self.shortcut(x)
+        residual = self.residual(x)
+        squeeze = self.squeeze(residual)
+        squeeze = squeeze.view(squeeze.size(0), -1)
+        excitation = self.excitation(squeeze)
+        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
+        x = residual * excitation.expand_as(residual) + shortcut
+        return F.relu(x)
+class BottleneckResidualSEBlock(nn.Module):
+    expansion = 4
+    def __init__(self, in_channels, out_channels, stride, r=16):
+        super().__init__()
+        self.residual = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, 3, stride=stride, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels * self.expansion, 1),
+            nn.BatchNorm2d(out_channels * self.expansion),
+            nn.ReLU(inplace=True),
+        )
+        self.squeeze = nn.AdaptiveAvgPool2d(1)
+        self.excitation = nn.Sequential(
+            nn.Linear(
+                out_channels * self.expansion, out_channels * self.expansion // r
+            ),
+            nn.ReLU(inplace=True),
+            nn.Linear(
+                out_channels * self.expansion // r, out_channels * self.expansion
+            ),
+            nn.Sigmoid(),
+        )
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels * self.expansion:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride),
+                nn.BatchNorm2d(out_channels * self.expansion),
+            )
+    def forward(self, x):
+        shortcut = self.shortcut(x)
+        residual = self.residual(x)
+        squeeze = self.squeeze(residual)
+        squeeze = squeeze.view(squeeze.size(0), -1)
+        excitation = self.excitation(squeeze)
+        excitation = excitation.view(residual.size(0), residual.size(1), 1, 1)
+        x = residual * excitation.expand_as(residual) + shortcut
+        return F.relu(x)
+class SEResNet(nn.Module):
+    def __init__(self, block, block_num, class_num=100):
+        super().__init__()
+        self.in_channels = 64
+        self.pre = nn.Sequential(
+            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
+        )
+        self.stage1 = self._make_stage(block, block_num[0], 64, 1)
+        self.stage2 = self._make_stage(block, block_num[1], 128, 2)
+        self.stage3 = self._make_stage(block, block_num[2], 256, 2)
+        self.stage4 = self._make_stage(block, block_num[3], 512, 2)
+        self.linear = nn.Linear(self.in_channels, class_num)
+    def forward(self, x):
+        x = self.pre(x)
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = F.adaptive_avg_pool2d(x, 1)
+        x = x.view(x.size(0), -1)
+        x = self.linear(x)
+        return x
+    def _make_stage(self, block, num, out_channels, stride):
+        layers = []
+        layers.append(block(self.in_channels, out_channels, stride))
+        self.in_channels = out_channels * block.expansion
+        while num - 1:
+            layers.append(block(self.in_channels, out_channels, 1))
+            num -= 1
+        return nn.Sequential(*layers)
+def seresnet18():
+    return SEResNet(BasicResidualSEBlock, [2, 2, 2, 2])
+def seresnet34():
+    return SEResNet(BasicResidualSEBlock, [3, 4, 6, 3])
+def seresnet50():
+    return SEResNet(BottleneckResidualSEBlock, [3, 4, 6, 3])
+def seresnet101():
+    return SEResNet(BottleneckResidualSEBlock, [3, 4, 23, 3])
+def seresnet152():
+    return SEResNet(BottleneckResidualSEBlock, [3, 8, 36, 3])
+def test_senet():
+    load_pytorch_module_and_check(
+        seresnet18, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v1.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v1.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
+        N, C, H, W = x.size()
+        g = self.groups
+        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, out_planes, stride, groups):
+        super(Bottleneck, self).__init__()
+        self.stride = stride
+        mid_planes = out_planes // 4
+        g = 1 if in_planes == 24 else groups
+        self.conv1 = nn.Conv2d(
+            in_planes, mid_planes, kernel_size=1, groups=g, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(mid_planes)
+        self.shuffle1 = ShuffleBlock(groups=g)
+        self.conv2 = nn.Conv2d(
+            mid_planes,
+            mid_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=mid_planes,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(mid_planes)
+        self.conv3 = nn.Conv2d(
+            mid_planes, out_planes, kernel_size=1, groups=groups, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(out_planes)
+        self.shortcut = nn.Sequential()
+        if stride == 2:
+            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.shuffle1(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        res = self.shortcut(x)
+        out = (
+            F.relu(torch.cat([out, res], 1)) if self.stride == 2 else F.relu(out + res)
+        )
+        return out
+class ShuffleNet(nn.Module):
+    def __init__(self, cfg):
+        super(ShuffleNet, self).__init__()
+        out_planes = cfg["out_planes"]
+        num_blocks = cfg["num_blocks"]
+        groups = cfg["groups"]
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_planes = 24
+        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
+        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
+        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
+        self.linear = nn.Linear(out_planes[2], 10)
+    def _make_layer(self, out_planes, num_blocks, groups):
+        layers = []
+        for i in range(num_blocks):
+            stride = 2 if i == 0 else 1
+            cat_planes = self.in_planes if i == 0 else 0
+            layers.append(
+                Bottleneck(
+                    self.in_planes,
+                    out_planes - cat_planes,
+                    stride=stride,
+                    groups=groups,
+                )
+            )
+            self.in_planes = out_planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def ShuffleNetG2():
+    cfg = {"out_planes": [200, 400, 800], "num_blocks": [4, 8, 4], "groups": 2}
+    return ShuffleNet(cfg)
+def ShuffleNetG3():
+    cfg = {"out_planes": [240, 480, 960], "num_blocks": [4, 8, 4], "groups": 3}
+    return ShuffleNet(cfg)
+def test_shufflenet_v1_g2():
+    load_pytorch_module_and_check(
+        ShuffleNetG2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v2.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_shufflenet_v2.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/shufflenetv2.py
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups=2):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
+        N, C, H, W = x.size()
+        g = self.groups
+        return x.view(N, g, C // g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+class SplitBlock(nn.Module):
+    def __init__(self, ratio):
+        super(SplitBlock, self).__init__()
+        self.ratio = ratio
+    def forward(self, x):
+        c = int(x.size(1) * self.ratio)
+        return x[:, :c, :, :], x[:, c:, :, :]
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, split_ratio=0.5):
+        super(BasicBlock, self).__init__()
+        self.split = SplitBlock(split_ratio)
+        in_channels = int(in_channels * split_ratio)
+        self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=in_channels,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(in_channels)
+        self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(in_channels)
+        self.shuffle = ShuffleBlock()
+    def forward(self, x):
+        x1, x2 = self.split(x)
+        out = F.relu(self.bn1(self.conv1(x2)))
+        out = self.bn2(self.conv2(out))
+        out = F.relu(self.bn3(self.conv3(out)))
+        out = torch.cat([x1, out], 1)
+        out = self.shuffle(out)
+        return out
+class DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownBlock, self).__init__()
+        mid_channels = out_channels // 2
+        # left
+        self.conv1 = nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=in_channels,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_channels)
+        # right
+        self.conv3 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(mid_channels)
+        self.conv4 = nn.Conv2d(
+            mid_channels,
+            mid_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            groups=mid_channels,
+            bias=False,
+        )
+        self.bn4 = nn.BatchNorm2d(mid_channels)
+        self.conv5 = nn.Conv2d(mid_channels, mid_channels, kernel_size=1, bias=False)
+        self.bn5 = nn.BatchNorm2d(mid_channels)
+        self.shuffle = ShuffleBlock()
+    def forward(self, x):
+        # left
+        out1 = self.bn1(self.conv1(x))
+        out1 = F.relu(self.bn2(self.conv2(out1)))
+        # right
+        out2 = F.relu(self.bn3(self.conv3(x)))
+        out2 = self.bn4(self.conv4(out2))
+        out2 = F.relu(self.bn5(self.conv5(out2)))
+        # concat
+        out = torch.cat([out1, out2], 1)
+        out = self.shuffle(out)
+        return out
+class ShuffleNetV2(nn.Module):
+    def __init__(self, net_size=0.5):
+        super(ShuffleNetV2, self).__init__()
+        out_channels = configs[net_size]["out_channels"]
+        num_blocks = configs[net_size]["num_blocks"]
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_channels = 24
+        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
+        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
+        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
+        self.conv2 = nn.Conv2d(
+            out_channels[2],
+            out_channels[3],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(out_channels[3])
+        self.linear = nn.Linear(out_channels[3], 10)
+    def _make_layer(self, out_channels, num_blocks):
+        layers = [DownBlock(self.in_channels, out_channels)]
+        for i in range(num_blocks):
+            layers.append(BasicBlock(out_channels))
+            self.in_channels = out_channels
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        # out = F.max_pool2d(out, 3, stride=2, padding=1)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+configs = {
+    0.5: {"out_channels": (48, 96, 192, 1024), "num_blocks": (3, 7, 3)},
+    1: {"out_channels": (116, 232, 464, 1024), "num_blocks": (3, 7, 3)},
+    1.5: {"out_channels": (176, 352, 704, 1024), "num_blocks": (3, 7, 3)},
+    2: {"out_channels": (224, 488, 976, 2048), "num_blocks": (3, 7, 3)},
+}
+def test_shufflenet_v2():
+    load_pytorch_module_and_check(
+        ShuffleNetV2, input_size=(1, 3, 32, 32), train_flag=False, flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_squeezenet.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_squeezenet.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+def test_squeezenet():
+    load_pytorch_module_and_check(
+        torchvision.models.SqueezeNet,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow" 
+    )
+def test_squeezenet1_0():
+    load_pytorch_module_and_check(
+        torchvision.models.squeezenet1_0,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow" 
+    )
+def test_squeezenet1_1():
+    load_pytorch_module_and_check(
+        torchvision.models.squeezenet1_1,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow" 
+    )
--- a/examples/x2oneflow/pytorch2oneflow/code_gen/test_vgg16.py
+++ b/examples/x2oneflow/pytorch2oneflow/code_gen/test_vgg16.py
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import torchvision
+from oneflow_onnx.x2oneflow.util import load_pytorch_module_and_check
+def test_vgg16():
+    load_pytorch_module_and_check(
+        torchvision.models.vgg16,
+        input_size=(1, 3, 224, 224),
+        train_flag=False,
+        flow_weight_dir="/tmp/oneflow",
+        oneflow_code_gen_flag=True
+    )
--- a/oneflow_onnx/x2oneflow/handler.py
+++ b/oneflow_onnx/x2oneflow/handler.py
@@ -229,8 +229,9 @@ class BackendHandler:
        else:
            for i in range(len(cls.OP_OUTPUS) - 1):
                pre_name = pre_name + '{}, '.format(cls.OP_OUTPUS[i])
-            pre_name = pre_name + '{} = '.format(cls.OP_OUTPUS[len(cls.OP_OUTPUS) - 1])    
+            pre_name = pre_name + '{} = '.format(cls.OP_OUTPUS[len(cls.OP_OUTPUS) - 1])
-        cls.ONEFLOW_CODE_GEN.append(pre_name + cls.code_gen(flow_func, kwargs))
+        if (pre_name + cls.code_gen(flow_func, kwargs)) not in cls.ONEFLOW_CODE_GEN: 
+            cls.ONEFLOW_CODE_GEN.append(pre_name + cls.code_gen(flow_func, kwargs))
        return flow_func(**kwargs)
    @classmethod
@@ -266,9 +267,23 @@ class BackendHandler:
        func += '('
        for k, v in kwargs.items():
            func += str(k) + '='
-            if type(v) == oneflow_api.LazyConsistentBlob:
+            if type(v) == list:
+                new_v = []
+                for x in v:
+                    if type(x) ==  oneflow_api.LazyConsistentBlob:
+                        new_v.append(cls.ONEFLOW_BLOBNAME_MAP[x])
+                    else:
+                        new_v.append(x)
+                v = new_v
+                func += '['
+                for x in v:
+                    func += str(x) + ', '
+                func += '], '
+            elif type(v) == oneflow_api.LazyConsistentBlob:
                v = cls.ONEFLOW_BLOBNAME_MAP[v]
-            func += str(v) + ', '
+                func += str(v) + ', '
+            else:
+                func += str(v) + ', '
        func += ')\n'
        return func

--- a/oneflow_onnx/x2oneflow/handlers/array.py
+++ b/oneflow_onnx/x2oneflow/handlers/array.py
@@ -106,6 +106,9 @@ class Flatten(BackendHandler):
 class Concat(BackendHandler):
    @classmethod
    def _common(cls, node, tensor_dict, **kwargs):
+        for x in node.input_tensor_names:
+            if tensor_dict[x] not in oneflow_blobname_map:
+                oneflow_blobname_map[tensor_dict[x]] = x
        inputs = [tensor_dict[inp] for inp in node.input_tensor_names]
        return cls.run_onnx_node(node, tensor_dict, inputs=[inputs])

--- a/oneflow_onnx/x2oneflow/handlers/nn.py
+++ b/oneflow_onnx/x2oneflow/handlers/nn.py
@@ -49,23 +49,61 @@ class BatchNormalization(BackendHandler):
    @classmethod
    def _common(cls, node, tensor_dict, **kwargs):
-        def randomString(stringLength=8):
+        x = tensor_dict[node.input_tensor_names[0]]
-            letters = string.ascii_lowercase
-            return "".join(random.choice(letters) for i in range(stringLength))
-        name = "bn_" + randomString()
-        # update oneflow flow.layers.batch_normalization to avoid this
-        # it does not work on model with mulitple bn
-        cls.copy_variable_file(node.input_tensor_names[1], name + "-gamma")
-        cls.copy_variable_file(node.input_tensor_names[2], name + "-beta")
-        cls.copy_variable_file(node.input_tensor_names[3], name + "-moving_mean")
-        cls.copy_variable_file(node.input_tensor_names[4], name + "-moving_variance")
-        node.input_tensor_names = node.input_tensor_names[:1]
-        return [
+        # code gen for batchnorm
-            cls.run_onnx_node(node, tensor_dict, name=name, **kwargs, attrs={"axis": 1})
+        func = 'weight_initializer = flow.truncated_normal(0.1)\n'
-        ]
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        func = 'weight_regularizer = flow.regularizers.l2(0.0005)\n'
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        scale = tensor_dict[node.input_tensor_names[1]]
+        offset = tensor_dict[node.input_tensor_names[2]]
+        mean = tensor_dict[node.input_tensor_names[3]]
+        variance = tensor_dict[node.input_tensor_names[4]]
+        epsilon = node.attrs.get("epsilon", 1e-5)
+        func = '{} = flow.get_variable('.format(node.input_tensor_names[1])
+        func = func + 'name={}, '.format("'"+node.input_tensor_names[1]+"'")
+        func = func + 'shape={}, '.format(list(scale.shape))
+        func = func + 'initializer=weight_initializer, '
+        func = func + 'regularizer=weight_regularizer)\n'
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        func = '{} = flow.get_variable('.format(node.input_tensor_names[2])
+        func = func + 'name={}, '.format("'"+node.input_tensor_names[2]+"'")
+        func = func + 'shape={}, '.format(list(offset.shape))
+        func = func + 'initializer=weight_initializer, '
+        func = func + 'regularizer=weight_regularizer)\n'
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        func = '{} = flow.get_variable('.format(node.input_tensor_names[3])
+        func = func + 'name={}, '.format("'"+node.input_tensor_names[3]+"'")
+        func = func + 'shape={}, '.format(list(mean.shape))
+        func = func + 'initializer=weight_initializer, '
+        func = func + 'regularizer=weight_regularizer)\n'
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        func = '{} = flow.get_variable('.format(node.input_tensor_names[4])
+        func = func + 'name={}, '.format("'"+node.input_tensor_names[4]+"'")
+        func = func + 'shape={}, '.format(list(variance.shape))
+        func = func + 'initializer=weight_initializer, '
+        func = func + 'regularizer=weight_regularizer)\n'
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        func = '{} = flow.nn.batch_normalization('.format(node.output_tensor_names[0])
+        func = func + 'x={}, mean={}, variance={}, offset={}, scale={}, axis=1, variance_epsilon={})\n'.format(node.input_tensor_names[0], node.input_tensor_names[3],
+                                                                                                        node.input_tensor_names[4], node.input_tensor_names[2], node.input_tensor_names[1], epsilon)
+        if func not in oneflow_code_gen:
+            oneflow_code_gen.append(func)
+        return flow.nn.batch_normalization(x, mean=mean, variance=variance, offset=offset, scale=scale, axis=1, variance_epsilon=epsilon)
    @classmethod
    def version_1(cls, node, tensor_dict, **kwargs):

--- a/oneflow_onnx/x2oneflow/onnx2flow.py
+++ b/oneflow_onnx/x2oneflow/onnx2flow.py
@@ -37,6 +37,7 @@ from oneflow_onnx.x2oneflow.handler import BackendHandler
 from oneflow_onnx.x2oneflow.handlers import *
 from oneflow_onnx.onnx_wrapper import Node as OnnxNode
+from oneflow_onnx.x2oneflow.handler import oneflow_code_gen, oneflow_blobname_map
 import io
 import tempfile
 import os
@@ -63,6 +64,8 @@ logger = logging.getLogger(__name__)
 def from_onnx(
    onnx_model: onnx.ModelProto, inputs, model_weight_dir="/tmp/tmp", do_onnxsim=True, from_tf2=False, from_paddle=False, from_pytorch=False, 
 ):
+    oneflow_code_gen = []
+    oneflow_blobname_map = dict()
    input_names = [x.name for x in onnx_model.graph.input]
    if type(inputs) is not dict:
        assert (