From 556d9167d20c2de4b4f659f2646af12bdfd2e8bb Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 19 Jun 2020 07:55:30 +0000 Subject: [PATCH] Move models of paddle to models/pd --- .../dml_mobilenetv1_pytorch_quickstart.ipynb | 270 ------------- .../DML/dml_mobilenetv1_pytorch_quickstart.md | 155 ++++++++ paddleslim/models/pd/__init__.py | 19 + paddleslim/models/pd/classification_models.py | 6 + paddleslim/models/pd/mobilenet.py | 197 +++++++++ paddleslim/models/pd/mobilenet_v2.py | 259 ++++++++++++ paddleslim/models/pd/resnet.py | 229 +++++++++++ paddleslim/models/pd/slim_mobilenet.py | 322 +++++++++++++++ paddleslim/models/pd/slimfacenet.py | 373 ++++++++++++++++++ paddleslim/models/pd/util.py | 32 ++ 10 files changed, 1592 insertions(+), 270 deletions(-) delete mode 100644 demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb create mode 100644 demo/DML/dml_mobilenetv1_pytorch_quickstart.md create mode 100644 paddleslim/models/pd/__init__.py create mode 100644 paddleslim/models/pd/classification_models.py create mode 100644 paddleslim/models/pd/mobilenet.py create mode 100644 paddleslim/models/pd/mobilenet_v2.py create mode 100644 paddleslim/models/pd/resnet.py create mode 100644 paddleslim/models/pd/slim_mobilenet.py create mode 100644 paddleslim/models/pd/slimfacenet.py create mode 100644 paddleslim/models/pd/util.py diff --git a/demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb b/demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb deleted file mode 100644 index 88e8dd89..00000000 --- a/demo/DML/dml_mobilenetv1_pytorch_quickstart.ipynb +++ /dev/null @@ -1,270 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. 安装依赖\n", - "\n", - "### 1.1 安装PaddleSlim\n", - "\n", - "```\n", - "git clone https://github.com/PaddlePaddle/PaddleSlim.git\n", - "cd PaddleSlim\n", - "python setup.py install\n", - "```\n", - "\n", - "### 1.2 安装pytorch\n", - "\n", - "```\n", - "pip install torch torchvision\n", - "```\n", - "\n", - "## 2. Import依赖与环境设置" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "import argparse\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "from torchvision import datasets, transforms, models\n", - "from torch.optim.lr_scheduler import StepLR\n", - "from paddleslim.dist import DML\n", - "\n", - "args = {\"batch-size\": 256,\n", - " \"test-batch-size\": 256,\n", - " \"epochs\": 10,\n", - " \"lr\": 1.0,\n", - " \"gamma\": 0.7,\n", - " \"seed\": 1,\n", - " \"log-interval\": 10}\n", - "\n", - "\n", - "\n", - "use_cuda = torch.cuda.is_available()\n", - "torch.manual_seed(args[\"seed\"])\n", - "device = torch.device(\"cuda\" if use_cuda else \"cpu\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. 准备数据\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "FloatProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html", - "output_type": "error", - "traceback": [ - "\u001b[0;31m\u001b[0m", - "\u001b[0;31mImportError\u001b[0mTraceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m transform=transforms.Compose([\n\u001b[1;32m 7\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mToTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mtransforms\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m ])),\n\u001b[1;32m 10\u001b[0m batch_size=args[\"batch_size\"], shuffle=True, **kwargs)\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/cifar.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, root, train, transform, target_transform, download)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_integrity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/cifar.pyc\u001b[0m in \u001b[0;36mdownload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Files already downloaded and verified'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0mdownload_and_extract_archive\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mroot\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmd5\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtgz_md5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mdownload_and_extract_archive\u001b[0;34m(url, download_root, extract_root, filename, md5, remove_finished)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbasename\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 264\u001b[0;31m \u001b[0mdownload_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload_root\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmd5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0marchive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdownload_root\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mdownload_url\u001b[0;34m(url, root, filename, md5)\u001b[0m\n\u001b[1;32m 83\u001b[0m urllib.request.urlretrieve(\n\u001b[1;32m 84\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0mreporthook\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgen_bar_updater\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m )\n\u001b[1;32m 87\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0murllib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mURLError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIOError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/torchvision/datasets/utils.pyc\u001b[0m in \u001b[0;36mgen_bar_updater\u001b[0;34m()\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgen_bar_updater\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mpbar\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mbar_update\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtotal_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/tqdm/notebook.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0mtotal\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0munit_scale\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m self.container = self.status_printer(\n\u001b[0;32m--> 209\u001b[0;31m self.fp, total, self.desc, self.ncols)\n\u001b[0m\u001b[1;32m 210\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/root/envs/paddle_1.8/lib/python2.7/site-packages/tqdm/notebook.pyc\u001b[0m in \u001b[0;36mstatus_printer\u001b[0;34m(_, total, desc, ncols)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;31m# #187 #451 #558\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m raise ImportError(\n\u001b[0;32m--> 104\u001b[0;31m \u001b[0;34m\"FloatProgress not found. Please update jupyter and ipywidgets.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 105\u001b[0m \u001b[0;34m\" See https://ipywidgets.readthedocs.io/en/stable\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \"/user_install.html\")\n", - "\u001b[0;31mImportError\u001b[0m: FloatProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html" - ] - } - ], - "source": [ - "\n", - "\n", - "kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}\n", - "train_loader = torch.utils.data.DataLoader(\n", - " datasets.CIFAR10('../data', train=True, download=True,\n", - " transform=transforms.Compose([\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n", - " ])),\n", - " batch_size=args[\"batch_size\"], shuffle=True, **kwargs)\n", - "test_loader = torch.utils.data.DataLoader(\n", - " datasets.CIFAR10('../data', train=False, transform=transforms.Compose([\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n", - " ])),\n", - " batch_size=args[\"test_batch_size\"], shuffle=True, **kwargs)\n", - "\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. 定义模型" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = models.mobilenet_v2(num_classes=10).to(device)\n", - "optimizer = optim.Adadelta(model.parameters(), lr=args.lr)\n", - "scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. 添加DML修饰\n", - "### 5.1 将模型转为DML模型" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = DML(model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5.2 将优化器转为DML优化器" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = model.opt(optimizer)\n", - "scheduler = model.lr(scheduler)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. 定义训练方法\n", - "\n", - "将原来的交叉熵损失替换为DML损失,代码如下:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def train(args, model, device, train_loader, optimizer, epoch):\n", - " model.train()\n", - " for batch_idx, (data, target) in enumerate(train_loader):\n", - " data, target = data.to(device), target.to(device)\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = model.dml_loss(output, target) \n", - "# output = F.softmax(output, dim=1)\n", - "# loss = F.cross_entropy(output, target)\n", - "# loss.backward()\n", - " optimizer.step()\n", - " if batch_idx % args[\"log_interval\"] == 0:\n", - " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", - " epoch, batch_idx * len(data), len(train_loader.dataset),\n", - " 100. * batch_idx / len(train_loader), loss.item()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 7. 定义测试方法" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def test(model, device, test_loader):\n", - " model.eval()\n", - " test_loss = 0\n", - " correct = 0\n", - " with torch.no_grad():\n", - " for data, target in test_loader:\n", - " data, target = data.to(device), target.to(device)\n", - " output = model(data)\n", - " output = F.softmax(output, dim=1)\n", - " loss = F.cross_entropy(output, target, reduction=\"sum\")\n", - " test_loss += loss\n", - " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", - " correct += pred.eq(target.view_as(pred)).sum().item()\n", - "\n", - " test_loss /= len(test_loader.dataset)\n", - "\n", - " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", - " test_loss, correct, len(test_loader.dataset),\n", - " 100. * correct / len(test_loader.dataset))) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 8. 开始训练" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "epochs = 10\n", - "for epoch in range(1, epochs + 1):\n", - " train(args, model, device, train_loader, optimizer, epoch)\n", - " test(model, device, test_loader)\n", - " scheduler.step()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demo/DML/dml_mobilenetv1_pytorch_quickstart.md b/demo/DML/dml_mobilenetv1_pytorch_quickstart.md new file mode 100644 index 00000000..f4ca7acc --- /dev/null +++ b/demo/DML/dml_mobilenetv1_pytorch_quickstart.md @@ -0,0 +1,155 @@ +## 1. 安装依赖 + +### 1.1 安装PaddleSlim + +``` +git clone https://github.com/PaddlePaddle/PaddleSlim.git +cd PaddleSlim +python setup.py install +``` + +### 1.2 安装pytorch + +``` +pip install torch torchvision +``` + +## 2. Import依赖与环境设置 + + +```python +from __future__ import print_function +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torchvision import datasets, transforms, models +from torch.optim.lr_scheduler import StepLR +from paddleslim.dist import DML + +args = {"batch-size": 256, + "test-batch-size": 256, + "epochs": 10, + "lr": 1.0, + "gamma": 0.7, + "seed": 1, + "log-interval": 10} + + + +use_cuda = torch.cuda.is_available() +torch.manual_seed(args["seed"]) +device = torch.device("cuda" if use_cuda else "cpu") +``` + +## 3. 准备数据 + + + +```python + + +kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} +train_loader = torch.utils.data.DataLoader( + datasets.CIFAR10('../data', train=True, download=True, + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ])), + batch_size=args["batch_size"], shuffle=True, **kwargs) +test_loader = torch.utils.data.DataLoader( + datasets.CIFAR10('../data', train=False, transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ])), + batch_size=args["test_batch_size"], shuffle=True, **kwargs) + + +``` + + +## 4. 定义模型 + + +```python +model = models.mobilenet_v2(num_classes=10).to(device) +optimizer = optim.Adadelta(model.parameters(), lr=args.lr) +scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) +``` + +## 5. 添加DML修饰 +### 5.1 将模型转为DML模型 + + +```python +model = DML(model) +``` + +### 5.2 将优化器转为DML优化器 + + +```python +optimizer = model.opt(optimizer) +scheduler = model.lr(scheduler) +``` + +### 6. 定义训练方法 + +将原来的交叉熵损失替换为DML损失,代码如下: + + +```python +def train(args, model, device, train_loader, optimizer, epoch): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = model.dml_loss(output, target) +# output = F.softmax(output, dim=1) +# loss = F.cross_entropy(output, target) +# loss.backward() + optimizer.step() + if batch_idx % args["log_interval"] == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), loss.item())) +``` + +## 7. 定义测试方法 + + +```python + +def test(model, device, test_loader): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + output = F.softmax(output, dim=1) + loss = F.cross_entropy(output, target, reduction="sum") + test_loss += loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= len(test_loader.dataset) + + print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(test_loader.dataset), + 100. * correct / len(test_loader.dataset))) +``` + +## 8. 开始训练 + + +```python +epochs = 10 +for epoch in range(1, epochs + 1): + train(args, model, device, train_loader, optimizer, epoch) + test(model, device, test_loader) + scheduler.step() +``` diff --git a/paddleslim/models/pd/__init__.py b/paddleslim/models/pd/__init__.py new file mode 100644 index 00000000..14ea9f3d --- /dev/null +++ b/paddleslim/models/pd/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .util import image_classification +from .slimfacenet import SlimFaceNet_A_x0_60, SlimFaceNet_B_x0_75, SlimFaceNet_C_x0_75 +from .slim_mobilenet import SlimMobileNet_v1, SlimMobileNet_v2, SlimMobileNet_v3, SlimMobileNet_v4, SlimMobileNet_v5 +__all__ = ["image_classification"] diff --git a/paddleslim/models/pd/classification_models.py b/paddleslim/models/pd/classification_models.py new file mode 100644 index 00000000..a5e605cc --- /dev/null +++ b/paddleslim/models/pd/classification_models.py @@ -0,0 +1,6 @@ +from __future__ import absolute_import +from .mobilenet import MobileNet +from .resnet import ResNet34, ResNet50 +from .mobilenet_v2 import MobileNetV2 +__all__ = ["model_list", "MobileNet", "ResNet34", "ResNet50", "MobileNetV2"] +model_list = ['MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2'] diff --git a/paddleslim/models/pd/mobilenet.py b/paddleslim/models/pd/mobilenet.py new file mode 100644 index 00000000..921d6226 --- /dev/null +++ b/paddleslim/models/pd/mobilenet.py @@ -0,0 +1,197 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNet'] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [10, 16, 30], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNet(): + def __init__(self): + self.params = train_parameters + + def net(self, input, class_dim=1000, scale=1.0): + # conv1: 112x112 + input = self.conv_bn_layer( + input, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1, + name="conv1") + + # 56x56 + input = self.depthwise_separable( + input, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale, + name="conv2_1") + + input = self.depthwise_separable( + input, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale, + name="conv2_2") + + # 28x28 + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale, + name="conv3_1") + + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale, + name="conv3_2") + + # 14x14 + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale, + name="conv4_1") + + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale, + name="conv4_2") + + # 14x14 + for i in range(5): + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale, + name="conv5" + "_" + str(i + 1)) + # 7x7 + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale, + name="conv5_6") + + input = self.depthwise_separable( + input, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale, + name="conv6") + + input = fluid.layers.pool2d( + input=input, + pool_size=0, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + act='softmax', + param_attr=ParamAttr( + initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def depthwise_separable(self, + input, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + depthwise_conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + "_dw") + + pointwise_conv = self.conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + return pointwise_conv diff --git a/paddleslim/models/pd/mobilenet_v2.py b/paddleslim/models/pd/mobilenet_v2.py new file mode 100644 index 00000000..ccfb250b --- /dev/null +++ b/paddleslim/models/pd/mobilenet_v2.py @@ -0,0 +1,259 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV2', 'MobileNetV2_x0_25, ' + 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', + 'MobileNetV2_x2_0', 'MobileNetV2_scale' +] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNetV2(): + def __init__(self, scale=1.0, change_depth=False): + self.params = train_parameters + self.scale = scale + self.change_depth = change_depth + + def net(self, input, class_dim=1000): + scale = self.scale + change_depth = self.change_depth + #if change_depth is True, the new depth is 1.4 times as deep as before. + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] if change_depth == False else [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 5, 2), + (6, 64, 7, 2), + (6, 96, 5, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + #conv1 + input = self.conv_bn_layer( + input, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + if_act=True, + name='conv1_1') + + # bottleneck sequences + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + input = self.invresi_blocks( + input=input, + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name='conv' + str(i)) + in_c = int(c * scale) + #last_conv + input = self.conv_bn_layer( + input=input, + num_filters=int(1280 * scale) if scale > 1.0 else 1280, + filter_size=1, + stride=1, + padding=0, + if_act=True, + name='conv9') + + input = fluid.layers.pool2d( + input=input, + pool_size=7, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + act='softmax', + param_attr=ParamAttr(name='fc10_weights'), + bias_attr=ParamAttr(name='fc10_offset')) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = self.conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = self.conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + linear_out = self.conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + if ifshortcut: + out = self.shortcut(input=input, data_residual=linear_out) + return out + else: + return linear_out + + def invresi_blocks(self, input, in_c, t, c, n, s, name=None): + first_block = self.inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block = self.inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block + + +def MobileNetV2_x0_25(): + model = MobileNetV2(scale=0.25) + return model + + +def MobileNetV2_x0_5(): + model = MobileNetV2(scale=0.5) + return model + + +def MobileNetV2_x1_0(): + model = MobileNetV2(scale=1.0) + return model + + +def MobileNetV2_x1_5(): + model = MobileNetV2(scale=1.5) + return model + + +def MobileNetV2_x2_0(): + model = MobileNetV2(scale=2.0) + return model + + +def MobileNetV2_scale(): + model = MobileNetV2(scale=1.2, change_depth=True) + return model diff --git a/paddleslim/models/pd/resnet.py b/paddleslim/models/pd/resnet.py new file mode 100644 index 00000000..4ceaef41 --- /dev/null +++ b/paddleslim/models/pd/resnet.py @@ -0,0 +1,229 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [10, 16, 30], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, prefix_name=''): + self.params = train_parameters + self.layers = layers + self.prefix_name = prefix_name + + def net(self, input, class_dim=1000, conv1_name='conv1', fc_name=None): + layers = self.layers + prefix_name = self.prefix_name if self.prefix_name is '' else self.prefix_name + '_' + supported_layers = [34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + # TODO(wanghaoshuang@baidu.com): + # fix name("conv1") conflict between student and teacher in distillation. + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name=prefix_name + conv1_name) + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv_name = prefix_name + conv_name + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + fc_name = fc_name if fc_name is None else prefix_name + fc_name + out = fluid.layers.fc(input=pool, + size=class_dim, + act='softmax', + name=fc_name, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv))) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv_name = prefix_name + conv_name + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + is_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + fc_name = fc_name if fc_name is None else prefix_name + fc_name + out = fluid.layers.fc( + input=pool, + size=class_dim, + act='softmax', + name=fc_name, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if self.prefix_name == '': + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + else: + if name.split("_")[1] == "conv1": + bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_", + 1)[1] + else: + bn_name = name.split("_", 1)[0] + "_bn" + name.split("_", + 1)[1][3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + def shortcut(self, input, ch_out, stride, is_first, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1 or is_first == True: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + def basic_block(self, input, num_filters, stride, is_first, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet34(prefix_name=''): + model = ResNet(layers=34, prefix_name=prefix_name) + return model + + +def ResNet50(prefix_name=''): + model = ResNet(layers=50, prefix_name=prefix_name) + return model + + +def ResNet101(): + model = ResNet(layers=101) + return model + + +def ResNet152(): + model = ResNet(layers=152) + return model diff --git a/paddleslim/models/pd/slim_mobilenet.py b/paddleslim/models/pd/slim_mobilenet.py new file mode 100644 index 00000000..b2e42bfb --- /dev/null +++ b/paddleslim/models/pd/slim_mobilenet.py @@ -0,0 +1,322 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'SlimMobileNet_v1', 'SlimMobileNet_v2', 'SlimMobileNet_v3', + 'SlimMobileNet_v4', 'SlimMobileNet_v5' +] + + +class SlimMobileNet(): + def __init__(self, scale=1.0, model_name='large', token=[]): + assert len(token) >= 45 + self.kernel_size_lis = token[:20] + self.exp_lis = token[20:40] + self.depth_lis = token[40:45] + + self.scale = scale + self.inplanes = 16 + if model_name == "large": + self.cfg_channel = [16, 24, 40, 80, 112, 160] + self.cfg_stride = [1, 2, 2, 2, 1, 2] + self.cfg_se = [False, False, True, False, True, True] + self.cfg_act = [ + 'relu', 'relu', 'relu', 'hard_swish', 'hard_swish', + 'hard_swish' + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError("mode[" + model_name + + "_model] is not implemented!") + + def net(self, input, class_dim=1000): + scale = self.scale + inplanes = self.inplanes + + kernel_size_lis = self.kernel_size_lis + exp_lis = self.exp_lis + depth_lis = self.depth_lis + cfg_channel = self.cfg_channel + cfg_stride = self.cfg_stride + cfg_se = self.cfg_se + cfg_act = self.cfg_act + + cls_ch_squeeze = self.cls_ch_squeeze + cls_ch_expand = self.cls_ch_expand + #conv1 + conv = self.conv_bn_layer( + input, + filter_size=3, + num_filters=self.make_divisible(inplanes * scale), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + inplanes = self.make_divisible(inplanes * scale) + + #conv2 + num_mid_filter = self.make_divisible(scale * inplanes) + _num_out_filter = cfg_channel[0] + num_out_filter = self.make_divisible(scale * _num_out_filter) + conv = self.residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=num_mid_filter, + num_out_filter=num_out_filter, + act=cfg_act[0], + stride=cfg_stride[0], + filter_size=3, + use_se=cfg_se[0], + name='conv2', + short=True) + inplanes = self.make_divisible(scale * cfg_channel[0]) + + i = 3 + for depth_id in range(len(depth_lis)): + for repeat_time in range(depth_lis[depth_id]): + num_mid_filter = self.make_divisible( + scale * _num_out_filter * + exp_lis[depth_id * 4 + repeat_time]) + _num_out_filter = cfg_channel[depth_id + 1] + num_out_filter = self.make_divisible(scale * _num_out_filter) + stride = cfg_stride[depth_id + 1] if repeat_time == 0 else 1 + conv = self.residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=num_mid_filter, + num_out_filter=num_out_filter, + act=cfg_act[depth_id + 1], + stride=stride, + filter_size=kernel_size_lis[depth_id * 4 + repeat_time], + use_se=cfg_se[depth_id + 1], + name='conv' + str(i)) + + inplanes = self.make_divisible(scale * + cfg_channel[depth_id + 1]) + i += 1 + + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=self.make_divisible(scale * cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + conv = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) + conv = fluid.layers.conv2d( + input=conv, + num_filters=cls_ch_expand, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name='last_1x1_conv_weights'), + bias_attr=False) + conv = fluid.layers.hard_swish(conv) + drop = fluid.layers.dropout(x=conv, dropout_prob=0.2) + out = fluid.layers.fc(input=drop, + size=class_dim, + param_attr=ParamAttr(name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True, + res_last_bn_init=False): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = fluid.layers.hard_swish(bn) + return bn + + def make_divisible(self, v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = num_out_filter // ratio + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act='relu', + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def residual_unit(self, + input, + num_in_filter, + num_mid_filter, + num_out_filter, + stride, + filter_size, + act=None, + use_se=False, + name=None, + short=False): + + if not short: + conv0 = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + else: + conv0 = input + + conv1 = self.conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') + if use_se: + conv1 = self.se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + + conv2 = self.conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear', + res_last_bn_init=True) + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input, y=conv2, act=None) + + +def SlimMobileNet_v1(token): + token = [ + 5, 3, 3, 7, 3, 3, 5, 7, 3, 3, 3, 3, 3, 3, 7, 3, 5, 3, 3, 3, 3, 3, 3, 6, + 3, 3, 3, 3, 4, 4, 4, 6, 4, 3, 4, 3, 6, 4, 3, 3, 2, 2, 2, 2, 4 + ] + model = SlimMobileNet(model_name='large', scale=1.0, token=token) + return model + + +def SlimMobileNet_v2(token): + token = [ + 5, 3, 5, 7, 3, 3, 7, 3, 5, 3, 3, 7, 3, 3, 3, 5, 5, 5, 3, 3, 3, 3, 4, 6, + 3, 3, 6, 3, 4, 4, 3, 4, 4, 4, 3, 6, 6, 4, 3, 3, 2, 2, 3, 2, 4 + ] + model = SlimMobileNet(model_name='large', scale=1.0, token=token) + return model + + +def SlimMobileNet_v3(token): + token = [ + 3, 3, 3, 3, 5, 3, 7, 7, 7, 3, 3, 7, 5, 3, 5, 7, 5, 3, 3, 3, 3, 3, 3, 3, + 3, 4, 3, 4, 3, 6, 4, 4, 4, 4, 6, 3, 6, 4, 6, 3, 2, 2, 3, 2, 4 + ] + model = SlimMobileNet(model_name='large', scale=1.0, token=token) + return model + + +def SlimMobileNet_v4(token): + token = [ + 3, 3, 3, 3, 5, 3, 3, 5, 7, 3, 5, 5, 5, 3, 3, 7, 3, 5, 3, 3, 3, 3, 4, 6, + 3, 4, 4, 6, 4, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 4, 2, 3, 3, 3, 4 + ] + model = SlimMobileNet(model_name='large', scale=1.0, token=token) + return model + + +def SlimMobileNet_v5(token): + token = [ + 7, 7, 3, 5, 7, 3, 5, 3, 7, 5, 3, 3, 5, 3, 7, 5, 7, 7, 5, 3, 3, 3, 6, 3, + 4, 6, 3, 6, 6, 3, 6, 4, 6, 6, 4, 3, 6, 6, 6, 6, 4, 4, 4, 4, 4 + ] + model = SlimMobileNet(model_name='large', scale=1.0, token=token) + return model + + +if __name__ == "__main__": + pass diff --git a/paddleslim/models/pd/slimfacenet.py b/paddleslim/models/pd/slimfacenet.py new file mode 100644 index 00000000..5276a515 --- /dev/null +++ b/paddleslim/models/pd/slimfacenet.py @@ -0,0 +1,373 @@ +# ================================================================ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import datetime +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + + +class SlimFaceNet(): + def __init__(self, class_dim, scale=0.6, arch=None): + + assert arch is not None + self.arch = arch + self.class_dim = class_dim + kernels = [3] + expansions = [2, 4, 6] + SE = [0, 1] + self.table = [] + for k in kernels: + for e in expansions: + for se in SE: + self.table.append((k, e, se)) + + if scale == 1.0: + # 100% - channel + self.Slimfacenet_bottleneck_setting = [ + # t, c , n ,s + [2, 64, 5, 2], + [4, 128, 1, 2], + [2, 128, 6, 1], + [4, 128, 1, 2], + [2, 128, 2, 1] + ] + elif scale == 0.9: + # 90% - channel + self.Slimfacenet_bottleneck_setting = [ + # t, c , n ,s + [2, 56, 5, 2], + [4, 116, 1, 2], + [2, 116, 6, 1], + [4, 116, 1, 2], + [2, 116, 2, 1] + ] + elif scale == 0.75: + # 75% - channel + self.Slimfacenet_bottleneck_setting = [ + # t, c , n ,s + [2, 48, 5, 2], + [4, 96, 1, 2], + [2, 96, 6, 1], + [4, 96, 1, 2], + [2, 96, 2, 1] + ] + elif scale == 0.6: + # 60% - channel + self.Slimfacenet_bottleneck_setting = [ + # t, c , n ,s + [2, 40, 5, 2], + [4, 76, 1, 2], + [2, 76, 6, 1], + [4, 76, 1, 2], + [2, 76, 2, 1] + ] + else: + print('WRONG scale') + exit() + self.extract_feature = True + + def set_extract_feature_flag(self, flag): + self.extract_feature = flag + + def net(self, input, label=None): + x = self.conv_bn_layer( + input, + filter_size=3, + num_filters=64, + stride=2, + padding=1, + num_groups=1, + if_act=True, + name='conv3x3') + x = self.conv_bn_layer( + x, + filter_size=3, + num_filters=64, + stride=1, + padding=1, + num_groups=64, + if_act=True, + name='dw_conv3x3') + + in_c = 64 + cnt = 0 + for _exp, out_c, times, _stride in self.Slimfacenet_bottleneck_setting: + for i in range(times): + stride = _stride if i == 0 else 1 + filter_size, exp, se = self.table[self.arch[cnt]] + se = False if se == 0 else True + x = self.residual_unit( + x, + num_in_filter=in_c, + num_out_filter=out_c, + stride=stride, + filter_size=filter_size, + expansion_factor=exp, + use_se=se, + name='residual_unit' + str(cnt + 1)) + cnt += 1 + in_c = out_c + + out_c = 512 + x = self.conv_bn_layer( + x, + filter_size=1, + num_filters=out_c, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name='conv1x1') + x = self.conv_bn_layer( + x, + filter_size=(7, 6), + num_filters=out_c, + stride=1, + padding=0, + num_groups=out_c, + if_act=False, + name='global_dw_conv7x7') + x = fluid.layers.conv2d( + x, + num_filters=128, + filter_size=1, + stride=1, + padding=0, + groups=1, + act=None, + use_cudnn=True, + param_attr=ParamAttr( + name='linear_conv1x1_weights', + initializer=MSRA(), + regularizer=fluid.regularizer.L2Decay(4e-4)), + bias_attr=False) + bn_name = 'linear_conv1x1_bn' + x = fluid.layers.batch_norm( + x, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]]) + + if self.extract_feature: + return x + + out = self.arc_margin_product( + x, label, self.class_dim, s=32.0, m=0.50, mode=2) + softmax = fluid.layers.softmax(input=out) + cost = fluid.layers.cross_entropy(input=softmax, label=label) + loss = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=out, label=label, k=1) + return loss, acc + + def residual_unit(self, + input, + num_in_filter, + num_out_filter, + stride, + filter_size, + expansion_factor, + use_se=False, + name=None): + + num_expfilter = int(round(num_in_filter * expansion_factor)) + input_data = input + + expand_conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_expfilter, + stride=1, + padding=0, + if_act=True, + name=name + '_expand') + + depthwise_conv = self.conv_bn_layer( + input=expand_conv, + filter_size=filter_size, + num_filters=num_expfilter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + num_groups=num_expfilter, + use_cudnn=True, + name=name + '_depthwise') + + if use_se: + depthwise_conv = self.se_block( + input=depthwise_conv, + num_out_filter=num_expfilter, + name=name + '_se') + + linear_conv = self.conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear') + if num_in_filter != num_out_filter or stride != 1: + return linear_conv + else: + return fluid.layers.elementwise_add( + x=input_data, y=linear_conv, act=None) + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = int(num_out_filter // ratio) + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act=None, + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv1 = fluid.layers.prelu( + conv1, + mode='channel', + param_attr=ParamAttr( + name=name + '_prelu', + regularizer=fluid.regularizer.L2Decay(0.0))) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + name=name + '_weights', initializer=MSRA()), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.prelu( + bn, + mode='channel', + param_attr=ParamAttr( + name=name + '_prelu', + regularizer=fluid.regularizer.L2Decay(0.0))) + else: + return bn + + def arc_margin_product(self, input, label, out_dim, s=32.0, m=0.50, + mode=2): + input_norm = fluid.layers.sqrt( + fluid.layers.reduce_sum( + fluid.layers.square(input), dim=1)) + input = fluid.layers.elementwise_div(input, input_norm, axis=0) + + weight = fluid.layers.create_parameter( + shape=[out_dim, input.shape[1]], + dtype='float32', + name='weight_norm', + attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Xavier(), + regularizer=fluid.regularizer.L2Decay(4e-4))) + + weight_norm = fluid.layers.sqrt( + fluid.layers.reduce_sum( + fluid.layers.square(weight), dim=1)) + weight = fluid.layers.elementwise_div(weight, weight_norm, axis=0) + weight = fluid.layers.transpose(weight, perm=[1, 0]) + cosine = fluid.layers.mul(input, weight) + sine = fluid.layers.sqrt(1.0 - fluid.layers.square(cosine)) + + cos_m = math.cos(m) + sin_m = math.sin(m) + phi = cosine * cos_m - sine * sin_m + + th = math.cos(math.pi - m) + mm = math.sin(math.pi - m) * m + + if mode == 1: + phi = self.paddle_where_more_than(cosine, 0, phi, cosine) + elif mode == 2: + phi = self.paddle_where_more_than(cosine, th, phi, cosine - mm) + else: + pass + + one_hot = fluid.one_hot(input=label, depth=out_dim) + output = fluid.layers.elementwise_mul( + one_hot, phi) + fluid.layers.elementwise_mul( + (1.0 - one_hot), cosine) + output = output * s + return output + + def paddle_where_more_than(self, target, limit, x, y): + mask = fluid.layers.cast(x=(target > limit), dtype='float32') + output = fluid.layers.elementwise_mul( + mask, x) + fluid.layers.elementwise_mul((1.0 - mask), y) + return output + + +def SlimFaceNet_A_x0_60(class_dim=None, scale=0.6, arch=None): + scale = 0.6 + arch = [0, 1, 5, 1, 0, 2, 1, 2, 0, 1, 2, 1, 1, 0, 1] + return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch) + + +def SlimFaceNet_B_x0_75(class_dim=None, scale=0.6, arch=None): + scale = 0.75 + arch = [1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 3, 2, 2, 3] + return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch) + + +def SlimFaceNet_C_x0_75(class_dim=None, scale=0.6, arch=None): + scale = 0.75 + arch = [1, 3, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 5, 5, 5] + return SlimFaceNet(class_dim=class_dim, scale=scale, arch=arch) + + +if __name__ == "__main__": + x = fluid.data(name='x', shape=[-1, 3, 112, 112], dtype='float32') + print(x.shape) + model = SlimFaceNet(10000, [1, 3, 3, 1, 1, 0, 0, 1, 0, 1, 1, 0, 5, 5, 3]) + y = model.net(x) diff --git a/paddleslim/models/pd/util.py b/paddleslim/models/pd/util.py new file mode 100644 index 00000000..3255acc5 --- /dev/null +++ b/paddleslim/models/pd/util.py @@ -0,0 +1,32 @@ +from __future__ import absolute_import +import paddle.fluid as fluid +from ..models import classification_models + +__all__ = ["image_classification"] + +model_list = classification_models.model_list + + +def image_classification(model, image_shape, class_num, use_gpu=False): + assert model in model_list + train_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + model = classification_models.__dict__[model]() + out = model.net(input=image, class_dim=class_num) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + val_program = fluid.default_main_program().clone(for_test=True) + + opt = fluid.optimizer.Momentum(0.1, 0.9) + opt.minimize(avg_cost) + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + return exe, train_program, val_program, (image, label), ( + acc_top1.name, acc_top5.name, avg_cost.name) -- GitLab