Move models of paddle to models/pd

556d9167 · wanghaoshuang · b1786496 · b1786496 · 556d9167 · 556d9167
10 changed file
--- a/demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb
+++ b/demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. 安装依赖\n",
-    "\n",
-    "### 1.1 安装PaddleSlim\n",
-    "\n",
-    "```\n",
-    "git clone https://github.com/PaddlePaddle/PaddleSlim.git\n",
-    "cd PaddleSlim\n",
-    "python setup.py install\n",
-    "```\n",
-    "\n",
-    "### 1.2 安装pytorch\n",
-    "\n",
-    "```\n",
-    "pip install torch torchvision\n",
-    "```\n",
-    "\n",
-    "## 2. Import依赖与环境设置"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import print_function\n",
-    "import argparse\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.nn.functional as F\n",
-    "import torch.optim as optim\n",
-    "from torchvision import datasets, transforms, models\n",
-    "from torch.optim.lr_scheduler import StepLR\n",
-    "from paddleslim.dist import DML\n",
-    "\n",
-    "args = {\"batch-size\": 256,\n",
-    "        \"test-batch-size\": 256,\n",
-    "        \"epochs\": 10,\n",
-    "        \"lr\": 1.0,\n",
-    "        \"gamma\": 0.7,\n",
-    "        \"seed\": 1,\n",
-    "        \"log-interval\": 10}\n",
-    "\n",
-    "\n",
-    "\n",
-    "use_cuda = torch.cuda.is_available()\n",
-    "torch.manual_seed(args[\"seed\"])\n",
-    "device = torch.device(\"cuda\" if use_cuda else \"cpu\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 准备数据\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ImportError",
-     "evalue": "FloatProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0mTraceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-3-1641ec60d682>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      6\u001b[0m                        transform=transforms.Compose([\n\u001b[1;32m      7\u001b[0m                            \u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mToTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m                            \u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m                        ])),\n\u001b[1;32m     10\u001b[0m         batch_size=args[\"batch_size\"], shuffle=True, **kwargs)\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/cifar.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, root, train, transform, target_transform, download)\u001b[0m\n\u001b[1;32m     62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     66\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_integrity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/cifar.pyc\u001b[0m in \u001b[0;36mdownload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    146\u001b[0m             \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Files already downloaded and verified'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    147\u001b[0m             \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m         \u001b[0mdownload_and_extract_archive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mroot\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmd5\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtgz_md5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mdownload_and_extract_archive\u001b[0;34m(url, download_root, extract_root, filename, md5, remove_finished)\u001b[0m\n\u001b[1;32m    262\u001b[0m         \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbasename\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    263\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 264\u001b[0;31m     \u001b[0mdownload_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload_root\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmd5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m     \u001b[0marchive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdownload_root\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mdownload_url\u001b[0;34m(url, root, filename, md5)\u001b[0m\n\u001b[1;32m     83\u001b[0m             urllib.request.urlretrieve(\n\u001b[1;32m     84\u001b[0m                 \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m                 \u001b[0mreporthook\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgen_bar_updater\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m             )\n\u001b[1;32m     87\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0murllib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mURLError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mgen_bar_updater\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgen_bar_updater\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m     \u001b[0mpbar\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mbar_update\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtotal_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/tqdm/notebook.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    207\u001b[0m         \u001b[0mtotal\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0munit_scale\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    208\u001b[0m         self.container = self.status_printer(\n\u001b[0;32m--> 209\u001b[0;31m             self.fp, total, self.desc, self.ncols)\n\u001b[0m\u001b[1;32m    210\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/tqdm/notebook.pyc\u001b[0m in \u001b[0;36mstatus_printer\u001b[0;34m(_, total, desc, ncols)\u001b[0m\n\u001b[1;32m    102\u001b[0m             \u001b[0;31m# #187 #451 #558\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    103\u001b[0m             raise ImportError(\n\u001b[0;32m--> 104\u001b[0;31m                 \u001b[0;34m\"FloatProgress not found. Please update jupyter and ipywidgets.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    105\u001b[0m                 \u001b[0;34m\" See https://ipywidgets.readthedocs.io/en/stable\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    106\u001b[0m                 \"/user_install.html\")\n",
-      "\u001b[0;31mImportError\u001b[0m: FloatProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "\n",
-    "kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}\n",
-    "train_loader = torch.utils.data.DataLoader(\n",
-    "        datasets.CIFAR10('../data', train=True, download=True,\n",
-    "                       transform=transforms.Compose([\n",
-    "                           transforms.ToTensor(),\n",
-    "                           transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-    "                       ])),\n",
-    "        batch_size=args[\"batch_size\"], shuffle=True, **kwargs)\n",
-    "test_loader = torch.utils.data.DataLoader(\n",
-    "        datasets.CIFAR10('../data', train=False, transform=transforms.Compose([\n",
-    "                           transforms.ToTensor(),\n",
-    "                           transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-    "                       ])),\n",
-    "        batch_size=args[\"test_batch_size\"], shuffle=True, **kwargs)\n",
-    "\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 定义模型"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = models.mobilenet_v2(num_classes=10).to(device)\n",
-    "optimizer = optim.Adadelta(model.parameters(), lr=args.lr)\n",
-    "scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5. 添加DML修饰\n",
-    "### 5.1 将模型转为DML模型"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = DML(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5.2 将优化器转为DML优化器"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "optimizer = model.opt(optimizer)\n",
-    "scheduler = model.lr(scheduler)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. 定义训练方法\n",
-    "\n",
-    "将原来的交叉熵损失替换为DML损失，代码如下："
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train(args, model, device, train_loader, optimizer, epoch):\n",
-    "    model.train()\n",
-    "    for batch_idx, (data, target) in enumerate(train_loader):\n",
-    "        data, target = data.to(device), target.to(device)\n",
-    "        optimizer.zero_grad()\n",
-    "        output = model(data)\n",
-    "        loss = model.dml_loss(output, target)        \n",
-    "#        output = F.softmax(output, dim=1)\n",
-    "#        loss = F.cross_entropy(output, target)\n",
-    "#        loss.backward()\n",
-    "        optimizer.step()\n",
-    "        if batch_idx % args[\"log_interval\"] == 0:\n",
-    "            print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
-    "                epoch, batch_idx * len(data), len(train_loader.dataset),\n",
-    "                100. * batch_idx / len(train_loader), loss.item()))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 7. 定义测试方法"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "def test(model, device, test_loader):\n",
-    "    model.eval()\n",
-    "    test_loss = 0\n",
-    "    correct = 0\n",
-    "    with torch.no_grad():\n",
-    "        for data, target in test_loader:\n",
-    "            data, target = data.to(device), target.to(device)\n",
-    "            output = model(data)\n",
-    "            output = F.softmax(output, dim=1)\n",
-    "            loss = F.cross_entropy(output, target, reduction=\"sum\")\n",
-    "            test_loss += loss\n",
-    "            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability\n",
-    "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
-    "\n",
-    "    test_loss /= len(test_loader.dataset)\n",
-    "\n",
-    "    print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
-    "        test_loss, correct, len(test_loader.dataset),\n",
-    "        100. * correct / len(test_loader.dataset))) "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 8. 开始训练"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "epochs = 10\n",
-    "for epoch in range(1, epochs + 1):\n",
-    "    train(args, model, device, train_loader, optimizer, epoch)\n",
-    "    test(model, device, test_loader)\n",
-    "    scheduler.step()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/demo/DML/dml_mobilenetv1_pytorch_quickstart.md
+++ b/demo/DML/dml_mobilenetv1_pytorch_quickstart.md
+## 1. 安装依赖
+
+### 1.1 安装PaddleSlim
+
+```
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+cd PaddleSlim
+python setup.py install
+```
+
+### 1.2 安装pytorch
+
+```
+pip install torch torchvision
+```
+
+## 2. Import依赖与环境设置
+
+
+```python
+from __future__ import print_function
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms, models
+from torch.optim.lr_scheduler import StepLR
+from paddleslim.dist import DML
+
+args = {"batch-size": 256,
+        "test-batch-size": 256,
+        "epochs": 10,
+        "lr": 1.0,
+        "gamma": 0.7,
+        "seed": 1,
+        "log-interval": 10}
+
+
+
+use_cuda = torch.cuda.is_available()
+torch.manual_seed(args["seed"])
+device = torch.device("cuda" if use_cuda else "cpu")
+```
+
+## 3. 准备数据
+
+
+
+```python
+
+
+kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
+train_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('../data', train=True, download=True,
+                       transform=transforms.Compose([
+                           transforms.ToTensor(),
+                           transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+                       ])),
+        batch_size=args["batch_size"], shuffle=True, **kwargs)
+test_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
+                           transforms.ToTensor(),
+                           transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+                       ])),
+        batch_size=args["test_batch_size"], shuffle=True, **kwargs)
+
+
+```
+
+
+## 4. 定义模型
+
+
+```python
+model = models.mobilenet_v2(num_classes=10).to(device)
+optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
+scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
+```
+
+## 5. 添加DML修饰
+### 5.1 将模型转为DML模型
+
+
+```python
+model = DML(model)
+```
+
+### 5.2 将优化器转为DML优化器
+
+
+```python
+optimizer = model.opt(optimizer)
+scheduler = model.lr(scheduler)
+```
+
+### 6. 定义训练方法
+
+将原来的交叉熵损失替换为DML损失，代码如下：
+
+
+```python
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = model.dml_loss(output, target)  
+#        output = F.softmax(output, dim=1)
+#        loss = F.cross_entropy(output, target)
+#        loss.backward()
+        optimizer.step()
+        if batch_idx % args["log_interval"] == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+```
+
+## 7. 定义测试方法
+
+
+```python
+
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            output = F.softmax(output, dim=1)
+            loss = F.cross_entropy(output, target, reduction="sum")
+            test_loss += loss
+            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+```
+
+## 8. 开始训练
+
+
+```python
+epochs = 10
+for epoch in range(1, epochs + 1):
+    train(args, model, device, train_loader, optimizer, epoch)
+    test(model, device, test_loader)
+    scheduler.step()
+```
--- a/paddleslim/models/pd/__init__.py
+++ b/paddleslim/models/pd/__init__.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from .util import image_classification
+from .slimfacenet import SlimFaceNet_A_x0_60, SlimFaceNet_B_x0_75, SlimFaceNet_C_x0_75
+from .slim_mobilenet import SlimMobileNet_v1, SlimMobileNet_v2, SlimMobileNet_v3, SlimMobileNet_v4, SlimMobileNet_v5
+__all__ = ["image_classification"]
--- a/paddleslim/models/pd/classification_models.py
+++ b/paddleslim/models/pd/classification_models.py
+from __future__ import absolute_import
+from .mobilenet import MobileNet
+from .resnet import ResNet34, ResNet50
+from .mobilenet_v2 import MobileNetV2
+__all__ = ["model_list", "MobileNet", "ResNet34", "ResNet50", "MobileNetV2"]
+model_list = ['MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2']
--- a/paddleslim/models/pd/mobilenet.py
+++ b/paddleslim/models/pd/mobilenet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ['MobileNet']
+
+train_parameters = {
+    "input_size": [3, 224, 224],
+    "input_mean": [0.485, 0.456, 0.406],
+    "input_std": [0.229, 0.224, 0.225],
+    "learning_strategy": {
+        "name": "piecewise_decay",
+        "batch_size": 256,
+        "epochs": [10, 16, 30],
+        "steps": [0.1, 0.01, 0.001, 0.0001]
+    }
+}
+
+
+class MobileNet():
+    def __init__(self):
+        self.params = train_parameters
+
+    def net(self, input, class_dim=1000, scale=1.0):
+        # conv1: 112x112
+        input = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            channels=3,
+            num_filters=int(32 * scale),
+            stride=2,
+            padding=1,
+            name="conv1")
+
+        # 56x56
+        input = self.depthwise_separable(
+            input,
+            num_filters1=32,
+            num_filters2=64,
+            num_groups=32,
+            stride=1,
+            scale=scale,
+            name="conv2_1")
+
+        input = self.depthwise_separable(
+            input,
+            num_filters1=64,
+            num_filters2=128,
+            num_groups=64,
+            stride=2,
+            scale=scale,
+            name="conv2_2")
+
+        # 28x28
+        input = self.depthwise_separable(
+            input,
+            num_filters1=128,
+            num_filters2=128,
+            num_groups=128,
+            stride=1,
+            scale=scale,
+            name="conv3_1")
+
+        input = self.depthwise_separable(
+            input,
+            num_filters1=128,
+            num_filters2=256,
+            num_groups=128,
+            stride=2,
+            scale=scale,
+            name="conv3_2")
+
+        # 14x14
+        input = self.depthwise_separable(
+            input,
+            num_filters1=256,
+            num_filters2=256,
+            num_groups=256,
+            stride=1,
+            scale=scale,
+            name="conv4_1")
+
+        input = self.depthwise_separable(
+            input,
+            num_filters1=256,
+            num_filters2=512,
+            num_groups=256,
+            stride=2,
+            scale=scale,
+            name="conv4_2")
+
+        # 14x14
+        for i in range(5):
+            input = self.depthwise_separable(
+                input,
+                num_filters1=512,
+                num_filters2=512,
+                num_groups=512,
+                stride=1,
+                scale=scale,
+                name="conv5" + "_" + str(i + 1))
+        # 7x7
+        input = self.depthwise_separable(
+            input,
+            num_filters1=512,
+            num_filters2=1024,
+            num_groups=512,
+            stride=2,
+            scale=scale,
+            name="conv5_6")
+
+        input = self.depthwise_separable(
+            input,
+            num_filters1=1024,
+            num_filters2=1024,
+            num_groups=1024,
+            stride=1,
+            scale=scale,
+            name="conv6")
+
+        input = fluid.layers.pool2d(
+            input=input,
+            pool_size=0,
+            pool_stride=1,
+            pool_type='avg',
+            global_pooling=True)
+
+        output = fluid.layers.fc(input=input,
+                                 size=class_dim,
+                                 act='softmax',
+                                 param_attr=ParamAttr(
+                                     initializer=MSRA(), name="fc7_weights"),
+                                 bias_attr=ParamAttr(name="fc7_offset"))
+
+        return output
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      channels=None,
+                      num_groups=1,
+                      act='relu',
+                      use_cudnn=True,
+                      name=None):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(
+                initializer=MSRA(), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + "_bn"
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def depthwise_separable(self,
+                            input,
+                            num_filters1,
+                            num_filters2,
+                            num_groups,
+                            stride,
+                            scale,
+                            name=None):
+        depthwise_conv = self.conv_bn_layer(
+            input=input,
+            filter_size=3,
+            num_filters=int(num_filters1 * scale),
+            stride=stride,
+            padding=1,
+            num_groups=int(num_groups * scale),
+            use_cudnn=False,
+            name=name + "_dw")
+
+        pointwise_conv = self.conv_bn_layer(
+            input=depthwise_conv,
+            filter_size=1,
+            num_filters=int(num_filters2 * scale),
+            stride=1,
+            padding=0,
+            name=name + "_sep")
+        return pointwise_conv
--- a/paddleslim/models/pd/mobilenet_v2.py
+++ b/paddleslim/models/pd/mobilenet_v2.py
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+    'MobileNetV2', 'MobileNetV2_x0_25, '
+    'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5',
+    'MobileNetV2_x2_0', 'MobileNetV2_scale'
+]
+
+train_parameters = {
+    "input_size": [3, 224, 224],
+    "input_mean": [0.485, 0.456, 0.406],
+    "input_std": [0.229, 0.224, 0.225],
+    "learning_strategy": {
+        "name": "piecewise_decay",
+        "batch_size": 256,
+        "epochs": [30, 60, 90],
+        "steps": [0.1, 0.01, 0.001, 0.0001]
+    }
+}
+
+
+class MobileNetV2():
+    def __init__(self, scale=1.0, change_depth=False):
+        self.params = train_parameters
+        self.scale = scale
+        self.change_depth = change_depth
+
+    def net(self, input, class_dim=1000):
+        scale = self.scale
+        change_depth = self.change_depth
+        #if change_depth is True, the new depth is 1.4 times as deep as before.
+        bottleneck_params_list = [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),
+            (6, 32, 3, 2),
+            (6, 64, 4, 2),
+            (6, 96, 3, 1),
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),
+        ] if change_depth == False else [
+            (1, 16, 1, 1),
+            (6, 24, 2, 2),
+            (6, 32, 5, 2),
+            (6, 64, 7, 2),
+            (6, 96, 5, 1),
+            (6, 160, 3, 2),
+            (6, 320, 1, 1),
+        ]
+
+        #conv1 
+        input = self.conv_bn_layer(
+            input,
+            num_filters=int(32 * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            if_act=True,
+            name='conv1_1')
+
+        # bottleneck sequences
+        i = 1
+        in_c = int(32 * scale)
+        for layer_setting in bottleneck_params_list:
+            t, c, n, s = layer_setting
+            i += 1
+            input = self.invresi_blocks(
+                input=input,
+                in_c=in_c,
+                t=t,
+                c=int(c * scale),
+                n=n,
+                s=s,
+                name='conv' + str(i))
+            in_c = int(c * scale)
+        #last_conv
+        input = self.conv_bn_layer(
+            input=input,
+            num_filters=int(1280 * scale) if scale > 1.0 else 1280,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=True,
+            name='conv9')
+
+        input = fluid.layers.pool2d(
+            input=input,
+            pool_size=7,
+            pool_stride=1,
+            pool_type='avg',
+            global_pooling=True)
+
+        output = fluid.layers.fc(input=input,
+                                 size=class_dim,
+                                 act='softmax',
+                                 param_attr=ParamAttr(name='fc10_weights'),
+                                 bias_attr=ParamAttr(name='fc10_offset'))
+        return output
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      channels=None,
+                      num_groups=1,
+                      if_act=True,
+                      name=None,
+                      use_cudnn=True):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        bn = fluid.layers.batch_norm(
+            input=conv,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+        if if_act:
+            return fluid.layers.relu6(bn)
+        else:
+            return bn
+
+    def shortcut(self, input, data_residual):
+        return fluid.layers.elementwise_add(input, data_residual)
+
+    def inverted_residual_unit(self,
+                               input,
+                               num_in_filter,
+                               num_filters,
+                               ifshortcut,
+                               stride,
+                               filter_size,
+                               padding,
+                               expansion_factor,
+                               name=None):
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+
+        channel_expand = self.conv_bn_layer(
+            input=input,
+            num_filters=num_expfilter,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            name=name + '_expand')
+
+        bottleneck_conv = self.conv_bn_layer(
+            input=channel_expand,
+            num_filters=num_expfilter,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            num_groups=num_expfilter,
+            if_act=True,
+            name=name + '_dwise',
+            use_cudnn=False)
+
+        linear_out = self.conv_bn_layer(
+            input=bottleneck_conv,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=False,
+            name=name + '_linear')
+        if ifshortcut:
+            out = self.shortcut(input=input, data_residual=linear_out)
+            return out
+        else:
+            return linear_out
+
+    def invresi_blocks(self, input, in_c, t, c, n, s, name=None):
+        first_block = self.inverted_residual_unit(
+            input=input,
+            num_in_filter=in_c,
+            num_filters=c,
+            ifshortcut=False,
+            stride=s,
+            filter_size=3,
+            padding=1,
+            expansion_factor=t,
+            name=name + '_1')
+
+        last_residual_block = first_block
+        last_c = c
+
+        for i in range(1, n):
+            last_residual_block = self.inverted_residual_unit(
+                input=last_residual_block,
+                num_in_filter=last_c,
+                num_filters=c,
+                ifshortcut=True,
+                stride=1,
+                filter_size=3,
+                padding=1,
+                expansion_factor=t,
+                name=name + '_' + str(i + 1))
+        return last_residual_block
+
+
+def MobileNetV2_x0_25():
+    model = MobileNetV2(scale=0.25)
+    return model
+
+
+def MobileNetV2_x0_5():
+    model = MobileNetV2(scale=0.5)
+    return model
+
+
+def MobileNetV2_x1_0():
+    model = MobileNetV2(scale=1.0)
+    return model
+
+
+def MobileNetV2_x1_5():
+    model = MobileNetV2(scale=1.5)
+    return model
+
+
+def MobileNetV2_x2_0():
+    model = MobileNetV2(scale=2.0)
+    return model
+
+
+def MobileNetV2_scale():
+    model = MobileNetV2(scale=1.2, change_depth=True)
+    return model
--- a/paddleslim/models/pd/resnet.py
+++ b/paddleslim/models/pd/resnet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import paddle.fluid as fluid
+import math
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ["ResNet", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
+
+train_parameters = {
+    "input_size": [3, 224, 224],
+    "input_mean": [0.485, 0.456, 0.406],
+    "input_std": [0.229, 0.224, 0.225],
+    "learning_strategy": {
+        "name": "piecewise_decay",
+        "batch_size": 256,
+        "epochs": [10, 16, 30],
+        "steps": [0.1, 0.01, 0.001, 0.0001]
+    }
+}
+
+
+class ResNet():
+    def __init__(self, layers=50, prefix_name=''):
+        self.params = train_parameters
+        self.layers = layers
+        self.prefix_name = prefix_name
+
+    def net(self, input, class_dim=1000, conv1_name='conv1', fc_name=None):
+        layers = self.layers
+        prefix_name = self.prefix_name if self.prefix_name is '' else self.prefix_name + '_'
+        supported_layers = [34, 50, 101, 152]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers, layers)
+
+        if layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        num_filters = [64, 128, 256, 512]
+
+        # TODO(wanghaoshuang@baidu.com):
+        # fix name("conv1") conflict between student and teacher in distillation.
+        conv = self.conv_bn_layer(
+            input=input,
+            num_filters=64,
+            filter_size=7,
+            stride=2,
+            act='relu',
+            name=prefix_name + conv1_name)
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
+
+        if layers >= 50:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    conv_name = prefix_name + conv_name
+                    conv = self.bottleneck_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=2 if i == 0 and block != 0 else 1,
+                        name=conv_name)
+
+            pool = fluid.layers.pool2d(
+                input=conv, pool_size=7, pool_type='avg', global_pooling=True)
+            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+            fc_name = fc_name if fc_name is None else prefix_name + fc_name
+            out = fluid.layers.fc(input=pool,
+                                  size=class_dim,
+                                  act='softmax',
+                                  name=fc_name,
+                                  param_attr=fluid.param_attr.ParamAttr(
+                                      initializer=fluid.initializer.Uniform(
+                                          -stdv, stdv)))
+        else:
+            for block in range(len(depth)):
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    conv_name = prefix_name + conv_name
+                    conv = self.basic_block(
+                        input=conv,
+                        num_filters=num_filters[block],
+                        stride=2 if i == 0 and block != 0 else 1,
+                        is_first=block == i == 0,
+                        name=conv_name)
+
+            pool = fluid.layers.pool2d(
+                input=conv, pool_type='avg', global_pooling=True)
+            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+            fc_name = fc_name if fc_name is None else prefix_name + fc_name
+            out = fluid.layers.fc(
+                input=pool,
+                size=class_dim,
+                act='softmax',
+                name=fc_name,
+                param_attr=fluid.param_attr.ParamAttr(
+                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
+
+        return out
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      act=None,
+                      name=None):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False,
+            name=name + '.conv2d.output.1')
+        if self.prefix_name == '':
+            if name == "conv1":
+                bn_name = "bn_" + name
+            else:
+                bn_name = "bn" + name[3:]
+        else:
+            if name.split("_")[1] == "conv1":
+                bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_",
+                                                                      1)[1]
+            else:
+                bn_name = name.split("_", 1)[0] + "_bn" + name.split("_",
+                                                                     1)[1][3:]
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            name=bn_name + '.output.1',
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance', )
+
+    def shortcut(self, input, ch_out, stride, is_first, name):
+        ch_in = input.shape[1]
+        if ch_in != ch_out or stride != 1 or is_first == True:
+            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+        else:
+            return input
+
+    def bottleneck_block(self, input, num_filters, stride, name):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        short = self.shortcut(
+            input,
+            num_filters * 4,
+            stride,
+            is_first=False,
+            name=name + "_branch1")
+
+        return fluid.layers.elementwise_add(
+            x=short, y=conv2, act='relu', name=name + ".add.output.5")
+
+    def basic_block(self, input, num_filters, stride, is_first, name):
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=3,
+            act='relu',
+            stride=stride,
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+        short = self.shortcut(
+            input, num_filters, stride, is_first, name=name + "_branch1")
+        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+
+
+def ResNet34(prefix_name=''):
+    model = ResNet(layers=34, prefix_name=prefix_name)
+    return model
+
+
+def ResNet50(prefix_name=''):
+    model = ResNet(layers=50, prefix_name=prefix_name)
+    return model
+
+
+def ResNet101():
+    model = ResNet(layers=101)
+    return model
+
+
+def ResNet152():
+    model = ResNet(layers=152)
+    return model
--- a/paddleslim/models/pd/slim_mobilenet.py
+++ b/paddleslim/models/pd/slim_mobilenet.py
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+    'SlimMobileNet_v1', 'SlimMobileNet_v2', 'SlimMobileNet_v3',
+    'SlimMobileNet_v4', 'SlimMobileNet_v5'
+]
+
+
+class SlimMobileNet():
+    def __init__(self, scale=1.0, model_name='large', token=[]):
+        assert len(token) >= 45
+        self.kernel_size_lis = token[:20]
+        self.exp_lis = token[20:40]
+        self.depth_lis = token[40:45]
+
+        self.scale = scale
+        self.inplanes = 16
+        if model_name == "large":
+            self.cfg_channel = [16, 24, 40, 80, 112, 160]
+            self.cfg_stride = [1, 2, 2, 2, 1, 2]
+            self.cfg_se = [False, False, True, False, True, True]
+            self.cfg_act = [
+                'relu', 'relu', 'relu', 'hard_swish', 'hard_swish',
+                'hard_swish'
+            ]
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError("mode[" + model_name +
+                                      "_model] is not implemented!")
+
+    def net(self, input, class_dim=1000):
+        scale = self.scale
+        inplanes = self.inplanes
+
+        kernel_size_lis = self.kernel_size_lis
+        exp_lis = self.exp_lis
+        depth_lis = self.depth_lis
+        cfg_channel = self.cfg_channel
+        cfg_stride = self.cfg_stride
+        cfg_se = self.cfg_se
+        cfg_act = self.cfg_act
+
+        cls_ch_squeeze = self.cls_ch_squeeze
+        cls_ch_expand = self.cls_ch_expand
+        #conv1
+        conv = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            num_filters=self.make_divisible(inplanes * scale),
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv1')
+        inplanes = self.make_divisible(inplanes * scale)
+
+        #conv2
+        num_mid_filter = self.make_divisible(scale * inplanes)
+        _num_out_filter = cfg_channel[0]
+        num_out_filter = self.make_divisible(scale * _num_out_filter)
+        conv = self.residual_unit(
+            input=conv,
+            num_in_filter=inplanes,
+            num_mid_filter=num_mid_filter,
+            num_out_filter=num_out_filter,
+            act=cfg_act[0],
+            stride=cfg_stride[0],
+            filter_size=3,
+            use_se=cfg_se[0],
+            name='conv2',
+            short=True)
+        inplanes = self.make_divisible(scale * cfg_channel[0])
+
+        i = 3
+        for depth_id in range(len(depth_lis)):
+            for repeat_time in range(depth_lis[depth_id]):
+                num_mid_filter = self.make_divisible(
+                    scale * _num_out_filter *
+                    exp_lis[depth_id * 4 + repeat_time])
+                _num_out_filter = cfg_channel[depth_id + 1]
+                num_out_filter = self.make_divisible(scale * _num_out_filter)
+                stride = cfg_stride[depth_id + 1] if repeat_time == 0 else 1
+                conv = self.residual_unit(
+                    input=conv,
+                    num_in_filter=inplanes,
+                    num_mid_filter=num_mid_filter,
+                    num_out_filter=num_out_filter,
+                    act=cfg_act[depth_id + 1],
+                    stride=stride,
+                    filter_size=kernel_size_lis[depth_id * 4 + repeat_time],
+                    use_se=cfg_se[depth_id + 1],
+                    name='conv' + str(i))
+
+                inplanes = self.make_divisible(scale *
+                                               cfg_channel[depth_id + 1])
+                i += 1
+
+        conv = self.conv_bn_layer(
+            input=conv,
+            filter_size=1,
+            num_filters=self.make_divisible(scale * cls_ch_squeeze),
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act='hard_swish',
+            name='conv_last')
+        conv = fluid.layers.pool2d(
+            input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
+        conv = fluid.layers.conv2d(
+            input=conv,
+            num_filters=cls_ch_expand,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            param_attr=ParamAttr(name='last_1x1_conv_weights'),
+            bias_attr=False)
+        conv = fluid.layers.hard_swish(conv)
+        drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
+        out = fluid.layers.fc(input=drop,
+                              size=class_dim,
+                              param_attr=ParamAttr(name='fc_weights'),
+                              bias_attr=ParamAttr(name='fc_offset'))
+        return out
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      num_groups=1,
+                      if_act=True,
+                      act=None,
+                      name=None,
+                      use_cudnn=True,
+                      res_last_bn_init=False):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        bn = fluid.layers.batch_norm(
+            input=conv,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+        if if_act:
+            if act == 'relu':
+                bn = fluid.layers.relu(bn)
+            elif act == 'hard_swish':
+                bn = fluid.layers.hard_swish(bn)
+        return bn
+
+    def make_divisible(self, v, divisor=8, min_value=None):
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+
+    def se_block(self, input, num_out_filter, ratio=4, name=None):
+        num_mid_filter = num_out_filter // ratio
+        pool = fluid.layers.pool2d(
+            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+        conv1 = fluid.layers.conv2d(
+            input=pool,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            act='relu',
+            param_attr=ParamAttr(name=name + '_1_weights'),
+            bias_attr=ParamAttr(name=name + '_1_offset'))
+        conv2 = fluid.layers.conv2d(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            act='hard_sigmoid',
+            param_attr=ParamAttr(name=name + '_2_weights'),
+            bias_attr=ParamAttr(name=name + '_2_offset'))
+        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
+        return scale
+
+    def residual_unit(self,
+                      input,
+                      num_in_filter,
+                      num_mid_filter,
+                      num_out_filter,
+                      stride,
+                      filter_size,
+                      act=None,
+                      use_se=False,
+                      name=None,
+                      short=False):
+
+        if not short:
+            conv0 = self.conv_bn_layer(
+                input=input,
+                filter_size=1,
+                num_filters=num_mid_filter,
+                stride=1,
+                padding=0,
+                if_act=True,
+                act=act,
+                name=name + '_expand')
+        else:
+            conv0 = input
+
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            filter_size=filter_size,
+            num_filters=num_mid_filter,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            if_act=True,
+            act=act,
+            num_groups=num_mid_filter,
+            use_cudnn=False,
+            name=name + '_depthwise')
+        if use_se:
+            conv1 = self.se_block(
+                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
+
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            stride=1,
+            padding=0,
+            if_act=False,
+            name=name + '_linear',
+            res_last_bn_init=True)
+        if num_in_filter != num_out_filter or stride != 1:
+            return conv2
+        else:
+            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+
+
+def SlimMobileNet_v1(token):
+    token = [
+        5, 3, 3, 7, 3, 3, 5, 7, 3, 3, 3, 3, 3, 3, 7, 3, 5, 3, 3, 3, 3, 3, 3, 6,
+        3, 3, 3, 3, 4, 4, 4, 6, 4, 3, 4, 3, 6, 4, 3, 3, 2, 2, 2, 2, 4
+    ]
+    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
+    return model
+
+
+def SlimMobileNet_v2(token):
+    token = [
+        5, 3, 5, 7, 3, 3, 7, 3, 5, 3, 3, 7, 3, 3, 3, 5, 5, 5, 3, 3, 3, 3, 4, 6,
+        3, 3, 6, 3, 4, 4, 3, 4, 4, 4, 3, 6, 6, 4, 3, 3, 2, 2, 3, 2, 4
+    ]
+    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
+    return model
+
+
+def SlimMobileNet_v3(token):
+    token = [
+        3, 3, 3, 3, 5, 3, 7, 7, 7, 3, 3, 7, 5, 3, 5, 7, 5, 3, 3, 3, 3, 3, 3, 3,
+        3, 4, 3, 4, 3, 6, 4, 4, 4, 4, 6, 3, 6, 4, 6, 3, 2, 2, 3, 2, 4
+    ]
+    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
+    return model
+
+
+def SlimMobileNet_v4(token):
+    token = [
+        3, 3, 3, 3, 5, 3, 3, 5, 7, 3, 5, 5, 5, 3, 3, 7, 3, 5, 3, 3, 3, 3, 4, 6,
+        3, 4, 4, 6, 4, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 4, 2, 3, 3, 3, 4
+    ]
+    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
+    return model
+
+
+def SlimMobileNet_v5(token):
+    token = [
+        7, 7, 3, 5, 7, 3, 5, 3, 7, 5, 3, 3, 5, 3, 7, 5, 7, 7, 5, 3, 3, 3, 6, 3,
+        4, 6, 3, 6, 6, 3, 6, 4, 6, 6, 4, 3, 6, 6, 6, 6, 4, 4, 4, 4, 4
+    ]
+    model = SlimMobileNet(model_name='large', scale=1.0, token=token)
+    return model
+
+
+if __name__ == "__main__":
+    pass
--- a/paddleslim/models/pd/slimfacenet.py
+++ b/paddleslim/models/pd/slimfacenet.py
+# ================================================================
+#   Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import datetime
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+
+class SlimFaceNet():
+    def __init__(self, class_dim, scale=0.6, arch=None):
+
+        assert arch is not None
+        self.arch = arch
+        self.class_dim = class_dim
+        kernels = [3]
+        expansions = [2, 4, 6]
+        SE = [0, 1]
+        self.table = []
+        for k in kernels:
+            for e in expansions:
+                for se in SE:
+                    self.table.append((k, e, se))
+
+        if scale == 1.0:
+            # 100% - channel
+            self.Slimfacenet_bottleneck_setting = [
+                # t, c , n ,s
+                [2, 64, 5, 2],
+                [4, 128, 1, 2],
+                [2, 128, 6, 1],
+                [4, 128, 1, 2],
+                [2, 128, 2, 1]
+            ]
+        elif scale == 0.9:
+            # 90% - channel
+            self.Slimfacenet_bottleneck_setting = [
+                # t, c , n ,s
+                [2, 56, 5, 2],
+                [4, 116, 1, 2],
+                [2, 116, 6, 1],
+                [4, 116, 1, 2],
+                [2, 116, 2, 1]
+            ]
+        elif scale == 0.75:
+            # 75% - channel
+            self.Slimfacenet_bottleneck_setting = [
+                # t, c , n ,s
+                [2, 48, 5, 2],
+                [4, 96, 1, 2],
+                [2, 96, 6, 1],
+                [4, 96, 1, 2],
+                [2, 96, 2, 1]
+            ]
+        elif scale == 0.6:
+            # 60% - channel
+            self.Slimfacenet_bottleneck_setting = [
+                # t, c , n ,s
+                [2, 40, 5, 2],
+                [4, 76, 1, 2],
+                [2, 76, 6, 1],
+                [4, 76, 1, 2],
+                [2, 76, 2, 1]
+            ]
+        else:
+            print('WRONG scale')
+            exit()
+        self.extract_feature = True
+
+    def set_extract_feature_flag(self, flag):
+        self.extract_feature = flag
+
+    def net(self, input, label=None):
+        x = self.conv_bn_layer(
+            input,
+            filter_size=3,
+            num_filters=64,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            name='conv3x3')
+        x = self.conv_bn_layer(
+            x,
+            filter_size=3,
+            num_filters=64,
+            stride=1,
+            padding=1,
+            num_groups=64,
+            if_act=True,
+            name='dw_conv3x3')
+
+        in_c = 64
+        cnt = 0
+        for _exp, out_c, times, _stride in self.Slimfacenet_bottleneck_setting:
+            for i in range(times):
+                stride = _stride if i == 0 else 1
+                filter_size, exp, se = self.table[self.arch[cnt]]
+                se = False if se == 0 else True
+                x = self.residual_unit(
+                    x,
+                    num_in_filter=in_c,
+                    num_out_filter=out_c,
+                    stride=stride,
+                    filter_size=filter_size,
+                    expansion_factor=exp,
+                    use_se=se,
+                    name='residual_unit' + str(cnt + 1))
+                cnt += 1
+                in_c = out_c
+
+        out_c = 512
+        x = self.conv_bn_layer(
+            x,
+            filter_size=1,
+            num_filters=out_c,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            name='conv1x1')
+        x = self.conv_bn_layer(
+            x,
+            filter_size=(7, 6),
+            num_filters=out_c,
+            stride=1,
+            padding=0,
+            num_groups=out_c,
+            if_act=False,
+            name='global_dw_conv7x7')
+        x = fluid.layers.conv2d(
+            x,
+            num_filters=128,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=None,
+            use_cudnn=True,
+            param_attr=ParamAttr(
+                name='linear_conv1x1_weights',
+                initializer=MSRA(),
+                regularizer=fluid.regularizer.L2Decay(4e-4)),
+            bias_attr=False)
+        bn_name = 'linear_conv1x1_bn'
+        x = fluid.layers.batch_norm(
+            x,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+        x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
+
+        if self.extract_feature:
+            return x
+
+        out = self.arc_margin_product(
+            x, label, self.class_dim, s=32.0, m=0.50, mode=2)
+        softmax = fluid.layers.softmax(input=out)
+        cost = fluid.layers.cross_entropy(input=softmax, label=label)
+        loss = fluid.layers.mean(x=cost)
+        acc = fluid.layers.accuracy(input=out, label=label, k=1)
+        return loss, acc
+
+    def residual_unit(self,
+                      input,
+                      num_in_filter,
+                      num_out_filter,
+                      stride,
+                      filter_size,
+                      expansion_factor,
+                      use_se=False,
+                      name=None):
+
+        num_expfilter = int(round(num_in_filter * expansion_factor))
+        input_data = input
+
+        expand_conv = self.conv_bn_layer(
+            input=input,
+            filter_size=1,
+            num_filters=num_expfilter,
+            stride=1,
+            padding=0,
+            if_act=True,
+            name=name + '_expand')
+
+        depthwise_conv = self.conv_bn_layer(
+            input=expand_conv,
+            filter_size=filter_size,
+            num_filters=num_expfilter,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            if_act=True,
+            num_groups=num_expfilter,
+            use_cudnn=True,
+            name=name + '_depthwise')
+
+        if use_se:
+            depthwise_conv = self.se_block(
+                input=depthwise_conv,
+                num_out_filter=num_expfilter,
+                name=name + '_se')
+
+        linear_conv = self.conv_bn_layer(
+            input=depthwise_conv,
+            filter_size=1,
+            num_filters=num_out_filter,
+            stride=1,
+            padding=0,
+            if_act=False,
+            name=name + '_linear')
+        if num_in_filter != num_out_filter or stride != 1:
+            return linear_conv
+        else:
+            return fluid.layers.elementwise_add(
+                x=input_data, y=linear_conv, act=None)
+
+    def se_block(self, input, num_out_filter, ratio=4, name=None):
+        num_mid_filter = int(num_out_filter // ratio)
+        pool = fluid.layers.pool2d(
+            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+        conv1 = fluid.layers.conv2d(
+            input=pool,
+            filter_size=1,
+            num_filters=num_mid_filter,
+            act=None,
+            param_attr=ParamAttr(name=name + '_1_weights'),
+            bias_attr=ParamAttr(name=name + '_1_offset'))
+        conv1 = fluid.layers.prelu(
+            conv1,
+            mode='channel',
+            param_attr=ParamAttr(
+                name=name + '_prelu',
+                regularizer=fluid.regularizer.L2Decay(0.0)))
+        conv2 = fluid.layers.conv2d(
+            input=conv1,
+            filter_size=1,
+            num_filters=num_out_filter,
+            act='hard_sigmoid',
+            param_attr=ParamAttr(name=name + '_2_weights'),
+            bias_attr=ParamAttr(name=name + '_2_offset'))
+        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
+        return scale
+
+    def conv_bn_layer(self,
+                      input,
+                      filter_size,
+                      num_filters,
+                      stride,
+                      padding,
+                      num_groups=1,
+                      if_act=True,
+                      name=None,
+                      use_cudnn=True):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=ParamAttr(
+                name=name + '_weights', initializer=MSRA()),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        bn = fluid.layers.batch_norm(
+            input=conv,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+        if if_act:
+            return fluid.layers.prelu(
+                bn,
+                mode='channel',
+                param_attr=ParamAttr(
+                    name=name + '_prelu',
+                    regularizer=fluid.regularizer.L2Decay(0.0)))
+        else:
+            return bn
+
+    def arc_margin_product(self, input, label, out_dim, s=32.0, m=0.50,
+                           mode=2):
+        input_norm = fluid.layers.sqrt(
+            fluid.layers.reduce_sum(
+                fluid.layers.square(input), dim=1))
+        input = fluid.layers.elementwise_div(input, input_norm, axis=0)
+
+        weight = fluid.layers.create_parameter(
+            shape=[out_dim, input.shape[1]],
+            dtype='float32',
+            name='weight_norm',
+            attr=fluid.param_attr.ParamAttr(
+                initializer=fluid.initializer.Xavier(),
+                regularizer=fluid.regularizer.L2Decay(4e-4)))
+
+        weight_norm = fluid.layers.sqrt(
+            fluid.layers.reduce_sum(
+                fluid.layers.square(weight), dim=1))
+        weight = fluid.layers.elementwise_div(weight, weight_norm, axis=0)
+        weight = fluid.layers.transpose(weight, perm=[1, 0])
+        cosine = fluid.layers.mul(input, weight)
+        sine = fluid.layers.sqrt(1.0 - fluid.layers.square(cosine))
+
+        cos_m = math.cos(m)
+        sin_m = math.sin(m)
+        phi = cosine * cos_m - sine * sin_m
+
+        th = math.cos(math.pi - m)
+        mm = math.sin(math.pi - m) * m
+
+        if mode == 1:
+            phi = self.paddle_where_more_than(cosine, 0, phi, cosine)
+        elif mode == 2:
+            phi = self.paddle_where_more_than(cosine, th, phi, cosine - mm)
+        else:
+            pass
+
+        one_hot = fluid.one_hot(input=label, depth=out_dim)
+        output = fluid.layers.elementwise_mul(
+            one_hot, phi) + fluid.layers.elementwise_mul(
+                (1.0 - one_hot), cosine)
+        output = output * s
+        return output
+
+    def paddle_where_more_than(self, target, limit, x, y):
+        mask = fluid.layers.cast(x=(target > limit), dtype='float32')
+        output = fluid.layers.elementwise_mul(
+            mask, x) + fluid.layers.elementwise_mul((1.0 - mask), y)
+        return output
+
+
+def SlimFaceNet_A_x0_60(class_dim=None, scale=0.6, arch=None):
+    scale = 0.6
+    arch = [0, 1, 5, 1, 0, 2, 1, 2, 0, 1, 2, 1, 1, 0, 1]
+    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
+
+
+def SlimFaceNet_B_x0_75(class_dim=None, scale=0.6, arch=None):
+    scale = 0.75
+    arch = [1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 3, 2, 2, 3]
+    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
+
+
+def SlimFaceNet_C_x0_75(class_dim=None, scale=0.6, arch=None):
+    scale = 0.75
+    arch = [1, 3, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 5, 5, 5]
+    return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch)
+
+
+if __name__ == "__main__":
+    x = fluid.data(name='x', shape=[-1, 3, 112, 112], dtype='float32')
+    print(x.shape)
+    model = SlimFaceNet(10000, [1, 3, 3, 1, 1, 0, 0, 1, 0, 1, 1, 0, 5, 5, 3])
+    y = model.net(x)
--- a/paddleslim/models/pd/util.py
+++ b/paddleslim/models/pd/util.py
+from __future__ import absolute_import
+import paddle.fluid as fluid
+from ..models import classification_models
+
+__all__ = ["image_classification"]
+
+model_list = classification_models.model_list
+
+
+def image_classification(model, image_shape, class_num, use_gpu=False):
+    assert model in model_list
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(train_program, startup_program):
+        image = fluid.layers.data(
+            name='image', shape=image_shape, dtype='float32')
+        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        model = classification_models.__dict__[model]()
+        out = model.net(input=image, class_dim=class_num)
+        cost = fluid.layers.cross_entropy(input=out, label=label)
+        avg_cost = fluid.layers.mean(x=cost)
+        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+        val_program = fluid.default_main_program().clone(for_test=True)
+
+        opt = fluid.optimizer.Momentum(0.1, 0.9)
+        opt.minimize(avg_cost)
+        place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+    return exe, train_program, val_program, (image, label), (
+        acc_top1.name, acc_top5.name, avg_cost.name)