Remove dependency for paddle

c1d7dbf3 · wanghaoshuang · 135bd582 · c1d7dbf3 · c1d7dbf3 · c1d7dbf3
11 changed file
--- a/paddleslim/__init__.py
+++ b/paddleslim/__init__.py
@@ -13,11 +13,20 @@
 # limitations under the License.
 from __future__ import absolute_import
-from paddleslim import models
-from paddleslim import prune
+__all__ = []
-from paddleslim import nas
+try:
-from paddleslim import analysis
+    from paddleslim import models
+    from paddleslim import prune
+    from paddleslim import nas
+    from paddleslim import analysis
+    from paddleslim import quant
+    from paddleslim import pantheon
+    __all__ += ['models', 'prune', 'nas', 'analysis', 'quant', 'pantheon']
+except ImportError:
+    print(
+        "PaddlePaddle is not installed in your env. So you can not use some APIs."
+    )
 from paddleslim import dist
-from paddleslim import quant
+__all__ += ['dist']
-from paddleslim import pantheon
-__all__ = ['models', 'prune', 'nas', 'analysis', 'dist', 'quant', 'pantheon']
--- a/paddleslim/dist/__init__.py
+++ b/paddleslim/dist/__init__.py
@@ -12,5 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .single_distiller import merge, fsp_loss, l2_loss, soft_label_loss, loss
+try:
+    from .single_distiller import merge, fsp_loss, l2_loss, soft_label_loss, loss
+except ImportError:
+    print("Paddle is not installed.")
 from .dml import DML
--- a/paddleslim/dist/dml.py
+++ b/paddleslim/dist/dml.py
@@ -15,111 +15,27 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+support_pd = False
-import copy
+support_torch = False
-import paddle.fluid as fluid
+import torch.nn as nn
+from .torch_dml import TORCH_DML
+support_torch = True
-class DML(fluid.dygraph.Layer):
-    def __init__(self, model, use_parallel):
+try:
-        super(DML, self).__init__()
+    import paddle.fluid as fluid
-        self.model = model
+    from .pd_dml import PD_DML
-        self.use_parallel = use_parallel
+    support_pd = True
-        self.model_num = len(self.model)
+except ImportError:
-        if self.use_parallel:
+    print("")
-            strategy = fluid.dygraph.parallel.prepare_context()
-            self.model = [
-                fluid.dygraph.parallel.DataParallel(m, strategy)
+def DML(model):
-                for m in self.model
+    """
-            ]
+    """
-    def full_name(self):
+    if support_torch and isinstance(model, nn.Module):
-        return [m.full_name() for m in self.model]
+        return TORCH_DML(model)
+    elif support_pd and isinstance(model, fluid.dygraph.Layer):
-    def forward(self, input):
+        return PDDML(model)
-        return [m(input) for m in self.model]
+    else:
+        print("Please install paddlepaddle or pytorch.")
-    def opt(self, optimizer):
-        assert len(
-            optimizer
-        ) == self.model_num, "The number of optimizers must match the number of models"
-        optimizer = DMLOptimizers(self.model, optimizer, self.use_parallel)
-        return optimizer
-    def ce_loss(self, logits, labels):
-        assert len(
-            logits
-        ) == self.model_num, "The number of logits must match the number of models"
-        ce_losses = []
-        for i in range(self.model_num):
-            ce_losses.append(
-                fluid.layers.mean(
-                    fluid.layers.softmax_with_cross_entropy(logits[i],
-                                                            labels)))
-        return ce_losses
-    def kl_loss(self, logits):
-        assert len(
-            logits
-        ) == self.model_num, "The number of logits must match the number of models"
-        if self.model_num == 1:
-            return []
-        kl_losses = []
-        for i in range(self.model_num):
-            cur_kl_loss = 0
-            for j in range(self.model_num):
-                if i != j:
-                    x = fluid.layers.log_softmax(logits[i], axis=1)
-                    y = fluid.layers.softmax(logits[j], axis=1)
-                    cur_kl_loss += fluid.layers.kldiv_loss(
-                        x, y, reduction='batchmean')
-            kl_losses.append(cur_kl_loss / (self.model_num - 1))
-        return kl_losses
-    def loss(self, logits, labels):
-        gt_losses = self.ce_loss(logits, labels)
-        kl_losses = self.kl_loss(logits)
-        if self.model_num > 1:
-            return [a + b for a, b in zip(gt_losses, kl_losses)]
-        else:
-            return gt_losses
-    def acc(self, logits, labels, k):
-        accs = [
-            fluid.layers.accuracy(
-                input=l, label=labels, k=k) for l in logits
-        ]
-        return accs
-    def train(self):
-        for m in self.model:
-            m.train()
-    def eval(self):
-        for m in self.model:
-            m.eval()
-class DMLOptimizers(object):
-    def __init__(self, model, optimizer, use_parallel):
-        self.model = model
-        self.optimizer = optimizer
-        self.use_parallel = use_parallel
-    def minimize(self, losses):
-        assert len(losses) == len(
-            self.optimizer
-        ), "The number of losses must match the number of optimizers"
-        for i in range(len(losses)):
-            if self.use_parallel:
-                losses[i] = self.model[i].scale_loss(losses[i])
-                losses[i].backward()
-                self.model[i].apply_collective_grads()
-            else:
-                losses[i].backward()
-            self.optimizer[i].minimize(losses[i])
-            self.model[i].clear_gradients()
-    def get_lr(self):
-        current_step_lr = [opt.current_step_lr() for opt in self.optimizer]
-        return current_step_lr
--- a/paddleslim/models/__init__.py
+++ b/paddleslim/models/__init__.py
@@ -11,9 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
-from .util import image_classification
-from .slimfacenet import SlimFaceNet_A_x0_60, SlimFaceNet_B_x0_75, SlimFaceNet_C_x0_75
-from .slim_mobilenet import SlimMobileNet_v1, SlimMobileNet_v2, SlimMobileNet_v3, SlimMobileNet_v4, SlimMobileNet_v5
-__all__ = ["image_classification"]
--- a/paddleslim/models/classification_models.py
+++ b/paddleslim/models/classification_models.py
-from __future__ import absolute_import
-from .mobilenet import MobileNet
-from .resnet import ResNet34, ResNet50
-from .mobilenet_v2 import MobileNetV2
-__all__ = ["model_list", "MobileNet", "ResNet34", "ResNet50", "MobileNetV2"]
-model_list = ['MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2']
--- a/paddleslim/models/mobilenet.py
+++ b/paddleslim/models/mobilenet.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-__all__ = ['MobileNet']
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [10, 16, 30],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-class MobileNet():
-    def __init__(self):
-        self.params = train_parameters
-    def net(self, input, class_dim=1000, scale=1.0):
-        # conv1: 112x112
-        input = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            channels=3,
-            num_filters=int(32 * scale),
-            stride=2,
-            padding=1,
-            name="conv1")
-        # 56x56
-        input = self.depthwise_separable(
-            input,
-            num_filters1=32,
-            num_filters2=64,
-            num_groups=32,
-            stride=1,
-            scale=scale,
-            name="conv2_1")
-        input = self.depthwise_separable(
-            input,
-            num_filters1=64,
-            num_filters2=128,
-            num_groups=64,
-            stride=2,
-            scale=scale,
-            name="conv2_2")
-        # 28x28
-        input = self.depthwise_separable(
-            input,
-            num_filters1=128,
-            num_filters2=128,
-            num_groups=128,
-            stride=1,
-            scale=scale,
-            name="conv3_1")
-        input = self.depthwise_separable(
-            input,
-            num_filters1=128,
-            num_filters2=256,
-            num_groups=128,
-            stride=2,
-            scale=scale,
-            name="conv3_2")
-        # 14x14
-        input = self.depthwise_separable(
-            input,
-            num_filters1=256,
-            num_filters2=256,
-            num_groups=256,
-            stride=1,
-            scale=scale,
-            name="conv4_1")
-        input = self.depthwise_separable(
-            input,
-            num_filters1=256,
-            num_filters2=512,
-            num_groups=256,
-            stride=2,
-            scale=scale,
-            name="conv4_2")
-        # 14x14
-        for i in range(5):
-            input = self.depthwise_separable(
-                input,
-                num_filters1=512,
-                num_filters2=512,
-                num_groups=512,
-                stride=1,
-                scale=scale,
-                name="conv5" + "_" + str(i + 1))
-        # 7x7
-        input = self.depthwise_separable(
-            input,
-            num_filters1=512,
-            num_filters2=1024,
-            num_groups=512,
-            stride=2,
-            scale=scale,
-            name="conv5_6")
-        input = self.depthwise_separable(
-            input,
-            num_filters1=1024,
-            num_filters2=1024,
-            num_groups=1024,
-            stride=1,
-            scale=scale,
-            name="conv6")
-        input = fluid.layers.pool2d(
-            input=input,
-            pool_size=0,
-            pool_stride=1,
-            pool_type='avg',
-            global_pooling=True)
-        output = fluid.layers.fc(input=input,
-                                 size=class_dim,
-                                 act='softmax',
-                                 param_attr=ParamAttr(
-                                     initializer=MSRA(), name="fc7_weights"),
-                                 bias_attr=ParamAttr(name="fc7_offset"))
-        return output
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      channels=None,
-                      num_groups=1,
-                      act='relu',
-                      use_cudnn=True,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(
-                initializer=MSRA(), name=name + "_weights"),
-            bias_attr=False)
-        bn_name = name + "_bn"
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-    def depthwise_separable(self,
-                            input,
-                            num_filters1,
-                            num_filters2,
-                            num_groups,
-                            stride,
-                            scale,
-                            name=None):
-        depthwise_conv = self.conv_bn_layer(
-            input=input,
-            filter_size=3,
-            num_filters=int(num_filters1 * scale),
-            stride=stride,
-            padding=1,
-            num_groups=int(num_groups * scale),
-            use_cudnn=False,
-            name=name + "_dw")
-        pointwise_conv = self.conv_bn_layer(
-            input=depthwise_conv,
-            filter_size=1,
-            num_filters=int(num_filters2 * scale),
-            stride=1,
-            padding=0,
-            name=name + "_sep")
-        return pointwise_conv
--- a/paddleslim/models/mobilenet_v2.py
+++ b/paddleslim/models/mobilenet_v2.py
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-__all__ = [
-    'MobileNetV2', 'MobileNetV2_x0_25, '
-    'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5',
-    'MobileNetV2_x2_0', 'MobileNetV2_scale'
-]
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [30, 60, 90],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-class MobileNetV2():
-    def __init__(self, scale=1.0, change_depth=False):
-        self.params = train_parameters
-        self.scale = scale
-        self.change_depth = change_depth
-    def net(self, input, class_dim=1000):
-        scale = self.scale
-        change_depth = self.change_depth
-        #if change_depth is True, the new depth is 1.4 times as deep as before.
-        bottleneck_params_list = [
-            (1, 16, 1, 1),
-            (6, 24, 2, 2),
-            (6, 32, 3, 2),
-            (6, 64, 4, 2),
-            (6, 96, 3, 1),
-            (6, 160, 3, 2),
-            (6, 320, 1, 1),
-        ] if change_depth == False else [
-            (1, 16, 1, 1),
-            (6, 24, 2, 2),
-            (6, 32, 5, 2),
-            (6, 64, 7, 2),
-            (6, 96, 5, 1),
-            (6, 160, 3, 2),
-            (6, 320, 1, 1),
-        ]
-        #conv1 
-        input = self.conv_bn_layer(
-            input,
-            num_filters=int(32 * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            if_act=True,
-            name='conv1_1')
-        # bottleneck sequences
-        i = 1
-        in_c = int(32 * scale)
-        for layer_setting in bottleneck_params_list:
-            t, c, n, s = layer_setting
-            i += 1
-            input = self.invresi_blocks(
-                input=input,
-                in_c=in_c,
-                t=t,
-                c=int(c * scale),
-                n=n,
-                s=s,
-                name='conv' + str(i))
-            in_c = int(c * scale)
-        #last_conv
-        input = self.conv_bn_layer(
-            input=input,
-            num_filters=int(1280 * scale) if scale > 1.0 else 1280,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            name='conv9')
-        input = fluid.layers.pool2d(
-            input=input,
-            pool_size=7,
-            pool_stride=1,
-            pool_type='avg',
-            global_pooling=True)
-        output = fluid.layers.fc(input=input,
-                                 size=class_dim,
-                                 act='softmax',
-                                 param_attr=ParamAttr(name='fc10_weights'),
-                                 bias_attr=ParamAttr(name='fc10_offset'))
-        return output
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      channels=None,
-                      num_groups=1,
-                      if_act=True,
-                      name=None,
-                      use_cudnn=True):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            return fluid.layers.relu6(bn)
-        else:
-            return bn
-    def shortcut(self, input, data_residual):
-        return fluid.layers.elementwise_add(input, data_residual)
-    def inverted_residual_unit(self,
-                               input,
-                               num_in_filter,
-                               num_filters,
-                               ifshortcut,
-                               stride,
-                               filter_size,
-                               padding,
-                               expansion_factor,
-                               name=None):
-        num_expfilter = int(round(num_in_filter * expansion_factor))
-        channel_expand = self.conv_bn_layer(
-            input=input,
-            num_filters=num_expfilter,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            name=name + '_expand')
-        bottleneck_conv = self.conv_bn_layer(
-            input=channel_expand,
-            num_filters=num_expfilter,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            num_groups=num_expfilter,
-            if_act=True,
-            name=name + '_dwise',
-            use_cudnn=False)
-        linear_out = self.conv_bn_layer(
-            input=bottleneck_conv,
-            num_filters=num_filters,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=False,
-            name=name + '_linear')
-        if ifshortcut:
-            out = self.shortcut(input=input, data_residual=linear_out)
-            return out
-        else:
-            return linear_out
-    def invresi_blocks(self, input, in_c, t, c, n, s, name=None):
-        first_block = self.inverted_residual_unit(
-            input=input,
-            num_in_filter=in_c,
-            num_filters=c,
-            ifshortcut=False,
-            stride=s,
-            filter_size=3,
-            padding=1,
-            expansion_factor=t,
-            name=name + '_1')
-        last_residual_block = first_block
-        last_c = c
-        for i in range(1, n):
-            last_residual_block = self.inverted_residual_unit(
-                input=last_residual_block,
-                num_in_filter=last_c,
-                num_filters=c,
-                ifshortcut=True,
-                stride=1,
-                filter_size=3,
-                padding=1,
-                expansion_factor=t,
-                name=name + '_' + str(i + 1))
-        return last_residual_block
-def MobileNetV2_x0_25():
-    model = MobileNetV2(scale=0.25)
-    return model
-def MobileNetV2_x0_5():
-    model = MobileNetV2(scale=0.5)
-    return model
-def MobileNetV2_x1_0():
-    model = MobileNetV2(scale=1.0)
-    return model
-def MobileNetV2_x1_5():
-    model = MobileNetV2(scale=1.5)
-    return model
-def MobileNetV2_x2_0():
-    model = MobileNetV2(scale=2.0)
-    return model
-def MobileNetV2_scale():
-    model = MobileNetV2(scale=1.2, change_depth=True)
-    return model
--- a/paddleslim/models/resnet.py
+++ b/paddleslim/models/resnet.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import math
-from paddle.fluid.param_attr import ParamAttr
-__all__ = ["ResNet", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [10, 16, 30],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-class ResNet():
-    def __init__(self, layers=50, prefix_name=''):
-        self.params = train_parameters
-        self.layers = layers
-        self.prefix_name = prefix_name
-    def net(self, input, class_dim=1000, conv1_name='conv1', fc_name=None):
-        layers = self.layers
-        prefix_name = self.prefix_name if self.prefix_name is '' else self.prefix_name + '_'
-        supported_layers = [34, 50, 101, 152]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(supported_layers, layers)
-        if layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_filters = [64, 128, 256, 512]
-        # TODO(wanghaoshuang@baidu.com):
-        # fix name("conv1") conflict between student and teacher in distillation.
-        conv = self.conv_bn_layer(
-            input=input,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act='relu',
-            name=prefix_name + conv1_name)
-        conv = fluid.layers.pool2d(
-            input=conv,
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
-        if layers >= 50:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv_name = prefix_name + conv_name
-                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        name=conv_name)
-            pool = fluid.layers.pool2d(
-                input=conv, pool_size=7, pool_type='avg', global_pooling=True)
-            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-            fc_name = fc_name if fc_name is None else prefix_name + fc_name
-            out = fluid.layers.fc(input=pool,
-                                  size=class_dim,
-                                  act='softmax',
-                                  name=fc_name,
-                                  param_attr=fluid.param_attr.ParamAttr(
-                                      initializer=fluid.initializer.Uniform(
-                                          -stdv, stdv)))
-        else:
-            for block in range(len(depth)):
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    conv_name = prefix_name + conv_name
-                    conv = self.basic_block(
-                        input=conv,
-                        num_filters=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        is_first=block == i == 0,
-                        name=conv_name)
-            pool = fluid.layers.pool2d(
-                input=conv, pool_type='avg', global_pooling=True)
-            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-            fc_name = fc_name if fc_name is None else prefix_name + fc_name
-            out = fluid.layers.fc(
-                input=pool,
-                size=class_dim,
-                act='softmax',
-                name=fc_name,
-                param_attr=fluid.param_attr.ParamAttr(
-                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
-        return out
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-            name=name + '.conv2d.output.1')
-        if self.prefix_name == '':
-            if name == "conv1":
-                bn_name = "bn_" + name
-            else:
-                bn_name = "bn" + name[3:]
-        else:
-            if name.split("_")[1] == "conv1":
-                bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_",
-                                                                      1)[1]
-            else:
-                bn_name = name.split("_", 1)[0] + "_bn" + name.split("_",
-                                                                     1)[1][3:]
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            name=bn_name + '.output.1',
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance', )
-    def shortcut(self, input, ch_out, stride, is_first, name):
-        ch_in = input.shape[1]
-        if ch_in != ch_out or stride != 1 or is_first == True:
-            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
-        else:
-            return input
-    def bottleneck_block(self, input, num_filters, stride, name):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c")
-        short = self.shortcut(
-            input,
-            num_filters * 4,
-            stride,
-            is_first=False,
-            name=name + "_branch1")
-        return fluid.layers.elementwise_add(
-            x=short, y=conv2, act='relu', name=name + ".add.output.5")
-    def basic_block(self, input, num_filters, stride, is_first, name):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            num_filters=num_filters,
-            filter_size=3,
-            act='relu',
-            stride=stride,
-            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b")
-        short = self.shortcut(
-            input, num_filters, stride, is_first, name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
-def ResNet34(prefix_name=''):
-    model = ResNet(layers=34, prefix_name=prefix_name)
-    return model
-def ResNet50(prefix_name=''):
-    model = ResNet(layers=50, prefix_name=prefix_name)
-    return model
-def ResNet101():
-    model = ResNet(layers=101)
-    return model
-def ResNet152():
-    model = ResNet(layers=152)
-    return model
--- a/paddleslim/models/slim_mobilenet.py
+++ b/paddleslim/models/slim_mobilenet.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-__all__ = [
-    'SlimMobileNet_v1', 'SlimMobileNet_v2', 'SlimMobileNet_v3',
-    'SlimMobileNet_v4', 'SlimMobileNet_v5'
-]
-class SlimMobileNet():
-    def __init__(self, scale=1.0, model_name='large', token=[]):
-        assert len(token) >= 45
-        self.kernel_size_lis = token[:20]
-        self.exp_lis = token[20:40]
-        self.depth_lis = token[40:45]
-        self.scale = scale
-        self.inplanes = 16
-        if model_name == "large":
-            self.cfg_channel = [16, 24, 40, 80, 112, 160]
-            self.cfg_stride = [1, 2, 2, 2, 1, 2]
-            self.cfg_se = [False, False, True, False, True, True]
-            self.cfg_act = [
-                'relu', 'relu', 'relu', 'hard_swish', 'hard_swish',
-                'hard_swish'
-            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
-    def net(self, input, class_dim=1000):
-        scale = self.scale
-        inplanes = self.inplanes
-        kernel_size_lis = self.kernel_size_lis
-        exp_lis = self.exp_lis
-        depth_lis = self.depth_lis
-        cfg_channel = self.cfg_channel
-        cfg_stride = self.cfg_stride
-        cfg_se = self.cfg_se
-        cfg_act = self.cfg_act
-        cls_ch_squeeze = self.cls_ch_squeeze
-        cls_ch_expand = self.cls_ch_expand
-        #conv1
-        conv = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=self.make_divisible(inplanes * scale),
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv1')
-        inplanes = self.make_divisible(inplanes * scale)
-        #conv2
-        num_mid_filter = self.make_divisible(scale * inplanes)
-        _num_out_filter = cfg_channel[0]
-        num_out_filter = self.make_divisible(scale * _num_out_filter)
-        conv = self.residual_unit(
-            input=conv,
-            num_in_filter=inplanes,
-            num_mid_filter=num_mid_filter,
-            num_out_filter=num_out_filter,
-            act=cfg_act[0],
-            stride=cfg_stride[0],
-            filter_size=3,
-            use_se=cfg_se[0],
-            name='conv2',
-            short=True)
-        inplanes = self.make_divisible(scale * cfg_channel[0])
-        i = 3
-        for depth_id in range(len(depth_lis)):
-            for repeat_time in range(depth_lis[depth_id]):
-                num_mid_filter = self.make_divisible(
-                    scale * _num_out_filter *
-                    exp_lis[depth_id * 4 + repeat_time])
-                _num_out_filter = cfg_channel[depth_id + 1]
-                num_out_filter = self.make_divisible(scale * _num_out_filter)
-                stride = cfg_stride[depth_id + 1] if repeat_time == 0 else 1
-                conv = self.residual_unit(
-                    input=conv,
-                    num_in_filter=inplanes,
-                    num_mid_filter=num_mid_filter,
-                    num_out_filter=num_out_filter,
-                    act=cfg_act[depth_id + 1],
-                    stride=stride,
-                    filter_size=kernel_size_lis[depth_id * 4 + repeat_time],
-                    use_se=cfg_se[depth_id + 1],
-                    name='conv' + str(i))
-                inplanes = self.make_divisible(scale *
-                                               cfg_channel[depth_id + 1])
-                i += 1
-        conv = self.conv_bn_layer(
-            input=conv,
-            filter_size=1,
-            num_filters=self.make_divisible(scale * cls_ch_squeeze),
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv_last')
-        conv = fluid.layers.pool2d(
-            input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv = fluid.layers.conv2d(
-            input=conv,
-            num_filters=cls_ch_expand,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            param_attr=ParamAttr(name='last_1x1_conv_weights'),
-            bias_attr=False)
-        conv = fluid.layers.hard_swish(conv)
-        drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
-        out = fluid.layers.fc(input=drop,
-                              size=class_dim,
-                              param_attr=ParamAttr(name='fc_weights'),
-                              bias_attr=ParamAttr(name='fc_offset'))
-        return out
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      act=None,
-                      name=None,
-                      use_cudnn=True,
-                      res_last_bn_init=False):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(
-                name=bn_name + "_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=bn_name + "_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            if act == 'relu':
-                bn = fluid.layers.relu(bn)
-            elif act == 'hard_swish':
-                bn = fluid.layers.hard_swish(bn)
-        return bn
-    def make_divisible(self, v, divisor=8, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-    def se_block(self, input, num_out_filter, ratio=4, name=None):
-        num_mid_filter = num_out_filter // ratio
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv1 = fluid.layers.conv2d(
-            input=pool,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
-        conv2 = fluid.layers.conv2d(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_mid_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      act=None,
-                      use_se=False,
-                      name=None,
-                      short=False):
-        if not short:
-            conv0 = self.conv_bn_layer(
-                input=input,
-                filter_size=1,
-                num_filters=num_mid_filter,
-                stride=1,
-                padding=0,
-                if_act=True,
-                act=act,
-                name=name + '_expand')
-        else:
-            conv0 = input
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            filter_size=filter_size,
-            num_filters=num_mid_filter,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            if_act=True,
-            act=act,
-            num_groups=num_mid_filter,
-            use_cudnn=False,
-            name=name + '_depthwise')
-        if use_se:
-            conv1 = self.se_block(
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            stride=1,
-            padding=0,
-            if_act=False,
-            name=name + '_linear',
-            res_last_bn_init=True)
-        if num_in_filter != num_out_filter or stride != 1:
-            return conv2
-        else:
-            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
-def SlimMobileNet_v1(token):
-    token = [
-        5, 3, 3, 7, 3, 3, 5, 7, 3, 3, 3, 3, 3, 3, 7, 3, 5, 3, 3, 3, 3, 3, 3, 6,
-        3, 3, 3, 3, 4, 4, 4, 6, 4, 3, 4, 3, 6, 4, 3, 3, 2, 2, 2, 2, 4
-    ]
-    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
-    return model
-def SlimMobileNet_v2(token):
-    token = [
-        5, 3, 5, 7, 3, 3, 7, 3, 5, 3, 3, 7, 3, 3, 3, 5, 5, 5, 3, 3, 3, 3, 4, 6,
-        3, 3, 6, 3, 4, 4, 3, 4, 4, 4, 3, 6, 6, 4, 3, 3, 2, 2, 3, 2, 4
-    ]
-    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
-    return model
-def SlimMobileNet_v3(token):
-    token = [
-        3, 3, 3, 3, 5, 3, 7, 7, 7, 3, 3, 7, 5, 3, 5, 7, 5, 3, 3, 3, 3, 3, 3, 3,
-        3, 4, 3, 4, 3, 6, 4, 4, 4, 4, 6, 3, 6, 4, 6, 3, 2, 2, 3, 2, 4
-    ]
-    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
-    return model
-def SlimMobileNet_v4(token):
-    token = [
-        3, 3, 3, 3, 5, 3, 3, 5, 7, 3, 5, 5, 5, 3, 3, 7, 3, 5, 3, 3, 3, 3, 4, 6,
-        3, 4, 4, 6, 4, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 4, 2, 3, 3, 3, 4
-    ]
-    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
-    return model
-def SlimMobileNet_v5(token):
-    token = [
-        7, 7, 3, 5, 7, 3, 5, 3, 7, 5, 3, 3, 5, 3, 7, 5, 7, 7, 5, 3, 3, 3, 6, 3,
-        4, 6, 3, 6, 6, 3, 6, 4, 6, 6, 4, 3, 6, 6, 6, 6, 4, 4, 4, 4, 4
-    ]
-    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
-    return model
-if __name__ == "__main__":
-    pass
--- a/paddleslim/models/slimfacenet.py
+++ b/paddleslim/models/slimfacenet.py
-# ================================================================
-#   Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import math
-import datetime
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-class SlimFaceNet():
-    def __init__(self, class_dim, scale=0.6, arch=None):
-        assert arch is not None
-        self.arch = arch
-        self.class_dim = class_dim
-        kernels = [3]
-        expansions = [2, 4, 6]
-        SE = [0, 1]
-        self.table = []
-        for k in kernels:
-            for e in expansions:
-                for se in SE:
-                    self.table.append((k, e, se))
-        if scale == 1.0:
-            # 100% - channel
-            self.Slimfacenet_bottleneck_setting = [
-                # t, c , n ,s
-                [2, 64, 5, 2],
-                [4, 128, 1, 2],
-                [2, 128, 6, 1],
-                [4, 128, 1, 2],
-                [2, 128, 2, 1]
-            ]
-        elif scale == 0.9:
-            # 90% - channel
-            self.Slimfacenet_bottleneck_setting = [
-                # t, c , n ,s
-                [2, 56, 5, 2],
-                [4, 116, 1, 2],
-                [2, 116, 6, 1],
-                [4, 116, 1, 2],
-                [2, 116, 2, 1]
-            ]
-        elif scale == 0.75:
-            # 75% - channel
-            self.Slimfacenet_bottleneck_setting = [
-                # t, c , n ,s
-                [2, 48, 5, 2],
-                [4, 96, 1, 2],
-                [2, 96, 6, 1],
-                [4, 96, 1, 2],
-                [2, 96, 2, 1]
-            ]
-        elif scale == 0.6:
-            # 60% - channel
-            self.Slimfacenet_bottleneck_setting = [
-                # t, c , n ,s
-                [2, 40, 5, 2],
-                [4, 76, 1, 2],
-                [2, 76, 6, 1],
-                [4, 76, 1, 2],
-                [2, 76, 2, 1]
-            ]
-        else:
-            print('WRONG scale')
-            exit()
-        self.extract_feature = True
-    def set_extract_feature_flag(self, flag):
-        self.extract_feature = flag
-    def net(self, input, label=None):
-        x = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=64,
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            name='conv3x3')
-        x = self.conv_bn_layer(
-            x,
-            filter_size=3,
-            num_filters=64,
-            stride=1,
-            padding=1,
-            num_groups=64,
-            if_act=True,
-            name='dw_conv3x3')
-        in_c = 64
-        cnt = 0
-        for _exp, out_c, times, _stride in self.Slimfacenet_bottleneck_setting:
-            for i in range(times):
-                stride = _stride if i == 0 else 1
-                filter_size, exp, se = self.table[self.arch[cnt]]
-                se = False if se == 0 else True
-                x = self.residual_unit(
-                    x,
-                    num_in_filter=in_c,
-                    num_out_filter=out_c,
-                    stride=stride,
-                    filter_size=filter_size,
-                    expansion_factor=exp,
-                    use_se=se,
-                    name='residual_unit' + str(cnt + 1))
-                cnt += 1
-                in_c = out_c
-        out_c = 512
-        x = self.conv_bn_layer(
-            x,
-            filter_size=1,
-            num_filters=out_c,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            name='conv1x1')
-        x = self.conv_bn_layer(
-            x,
-            filter_size=(7, 6),
-            num_filters=out_c,
-            stride=1,
-            padding=0,
-            num_groups=out_c,
-            if_act=False,
-            name='global_dw_conv7x7')
-        x = fluid.layers.conv2d(
-            x,
-            num_filters=128,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=None,
-            use_cudnn=True,
-            param_attr=ParamAttr(
-                name='linear_conv1x1_weights',
-                initializer=MSRA(),
-                regularizer=fluid.regularizer.L2Decay(4e-4)),
-            bias_attr=False)
-        bn_name = 'linear_conv1x1_bn'
-        x = fluid.layers.batch_norm(
-            x,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
-        if self.extract_feature:
-            return x
-        out = self.arc_margin_product(
-            x, label, self.class_dim, s=32.0, m=0.50, mode=2)
-        softmax = fluid.layers.softmax(input=out)
-        cost = fluid.layers.cross_entropy(input=softmax, label=label)
-        loss = fluid.layers.mean(x=cost)
-        acc = fluid.layers.accuracy(input=out, label=label, k=1)
-        return loss, acc
-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      expansion_factor,
-                      use_se=False,
-                      name=None):
-        num_expfilter = int(round(num_in_filter * expansion_factor))
-        input_data = input
-        expand_conv = self.conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_expfilter,
-            stride=1,
-            padding=0,
-            if_act=True,
-            name=name + '_expand')
-        depthwise_conv = self.conv_bn_layer(
-            input=expand_conv,
-            filter_size=filter_size,
-            num_filters=num_expfilter,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            if_act=True,
-            num_groups=num_expfilter,
-            use_cudnn=True,
-            name=name + '_depthwise')
-        if use_se:
-            depthwise_conv = self.se_block(
-                input=depthwise_conv,
-                num_out_filter=num_expfilter,
-                name=name + '_se')
-        linear_conv = self.conv_bn_layer(
-            input=depthwise_conv,
-            filter_size=1,
-            num_filters=num_out_filter,
-            stride=1,
-            padding=0,
-            if_act=False,
-            name=name + '_linear')
-        if num_in_filter != num_out_filter or stride != 1:
-            return linear_conv
-        else:
-            return fluid.layers.elementwise_add(
-                x=input_data, y=linear_conv, act=None)
-    def se_block(self, input, num_out_filter, ratio=4, name=None):
-        num_mid_filter = int(num_out_filter // ratio)
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv1 = fluid.layers.conv2d(
-            input=pool,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            act=None,
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
-        conv1 = fluid.layers.prelu(
-            conv1,
-            mode='channel',
-            param_attr=ParamAttr(
-                name=name + '_prelu',
-                regularizer=fluid.regularizer.L2Decay(0.0)))
-        conv2 = fluid.layers.conv2d(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      name=None,
-                      use_cudnn=True):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(
-                name=name + '_weights', initializer=MSRA()),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            return fluid.layers.prelu(
-                bn,
-                mode='channel',
-                param_attr=ParamAttr(
-                    name=name + '_prelu',
-                    regularizer=fluid.regularizer.L2Decay(0.0)))
-        else:
-            return bn
-    def arc_margin_product(self, input, label, out_dim, s=32.0, m=0.50,
-                           mode=2):
-        input_norm = fluid.layers.sqrt(
-            fluid.layers.reduce_sum(
-                fluid.layers.square(input), dim=1))
-        input = fluid.layers.elementwise_div(input, input_norm, axis=0)
-        weight = fluid.layers.create_parameter(
-            shape=[out_dim, input.shape[1]],
-            dtype='float32',
-            name='weight_norm',
-            attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Xavier(),
-                regularizer=fluid.regularizer.L2Decay(4e-4)))
-        weight_norm = fluid.layers.sqrt(
-            fluid.layers.reduce_sum(
-                fluid.layers.square(weight), dim=1))
-        weight = fluid.layers.elementwise_div(weight, weight_norm, axis=0)
-        weight = fluid.layers.transpose(weight, perm=[1, 0])
-        cosine = fluid.layers.mul(input, weight)
-        sine = fluid.layers.sqrt(1.0 - fluid.layers.square(cosine))
-        cos_m = math.cos(m)
-        sin_m = math.sin(m)
-        phi = cosine * cos_m - sine * sin_m
-        th = math.cos(math.pi - m)
-        mm = math.sin(math.pi - m) * m
-        if mode == 1:
-            phi = self.paddle_where_more_than(cosine, 0, phi, cosine)
-        elif mode == 2:
-            phi = self.paddle_where_more_than(cosine, th, phi, cosine - mm)
-        else:
-            pass
-        one_hot = fluid.one_hot(input=label, depth=out_dim)
-        output = fluid.layers.elementwise_mul(
-            one_hot, phi) + fluid.layers.elementwise_mul(
-                (1.0 - one_hot), cosine)
-        output = output * s
-        return output
-    def paddle_where_more_than(self, target, limit, x, y):
-        mask = fluid.layers.cast(x=(target > limit), dtype='float32')
-        output = fluid.layers.elementwise_mul(
-            mask, x) + fluid.layers.elementwise_mul((1.0 - mask), y)
-        return output
-def SlimFaceNet_A_x0_60(class_dim=None, scale=0.6, arch=None):
-    scale = 0.6
-    arch = [0, 1, 5, 1, 0, 2, 1, 2, 0, 1, 2, 1, 1, 0, 1]
-    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
-def SlimFaceNet_B_x0_75(class_dim=None, scale=0.6, arch=None):
-    scale = 0.75
-    arch = [1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 3, 2, 2, 3]
-    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
-def SlimFaceNet_C_x0_75(class_dim=None, scale=0.6, arch=None):
-    scale = 0.75
-    arch = [1, 3, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 5, 5, 5]
-    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
-if __name__ == "__main__":
-    x = fluid.data(name='x', shape=[-1, 3, 112, 112], dtype='float32')
-    print(x.shape)
-    model = SlimFaceNet(10000, [1, 3, 3, 1, 1, 0, 0, 1, 0, 1, 1, 0, 5, 5, 3])
-    y = model.net(x)
--- a/paddleslim/models/util.py
+++ b/paddleslim/models/util.py
-from __future__ import absolute_import
-import paddle.fluid as fluid
-from ..models import classification_models
-__all__ = ["image_classification"]
-model_list = classification_models.model_list
-def image_classification(model, image_shape, class_num, use_gpu=False):
-    assert model in model_list
-    train_program = fluid.Program()
-    startup_program = fluid.Program()
-    with fluid.program_guard(train_program, startup_program):
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        model = classification_models.__dict__[model]()
-        out = model.net(input=image, class_dim=class_num)
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        val_program = fluid.default_main_program().clone(for_test=True)
-        opt = fluid.optimizer.Momentum(0.1, 0.9)
-        opt.minimize(avg_cost)
-        place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-    return exe, train_program, val_program, (image, label), (
-        acc_top1.name, acc_top5.name, avg_cost.name)