finish code 5.10

0c3397f5 · shusentang · f6f44807 · 0c3397f5
隐藏空白更改
内联并排

Showing with 88 addition and 36 deletion

code/chapter05_CNN/5.10_batch-norm.ipynb code/chapter05_CNN/5.10_batch-norm.ipynb +88 -36

未找到文件。
--- a/code/chapter05_CNN/5.10_batch-norm.ipynb
+++ b/code/chapter05_CNN/5.10_batch-norm.ipynb
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5.10 批量归一化"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
@@ -29,6 +36,13 @@
    "print(device)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.10.2 从零开始实现"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@@ -64,28 +78,6 @@
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "x = torch.tensor(1.0, device=device)\n",
-    "y = torch.tensor(1.0).to(x.device)\n",
-    "x.device == y.device"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
   "outputs": [],
   "source": [
    "class BatchNorm(nn.Module):\n",
@@ -107,16 +99,23 @@
    "        if self.moving_mean.device != X.device:\n",
    "            self.moving_mean = self.moving_mean.to(X.device)\n",
    "            self.moving_var = self.moving_var.to(X.device)\n",
-    "        # 保存更新过的moving_mean和moving_var\n",
+    "        # 保存更新过的moving_mean和moving_var, Module实例的traning属性默认为true, 调用.eval()后设成false\n",
    "        Y, self.moving_mean, self.moving_var = batch_norm(self.training, \n",
    "            X, self.gamma, self.beta, self.moving_mean,\n",
    "            self.moving_var, eps=1e-5, momentum=0.9)\n",
    "        return Y"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.10.2.1 使用批量归一化层的LeNet"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -142,7 +141,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -150,11 +149,11 @@
     "output_type": "stream",
     "text": [
      "training on  cuda\n",
-      "epoch 1, loss 0.0039, train acc 0.787, test acc 0.825, time 4.6 sec\n",
-      "epoch 2, loss 0.0018, train acc 0.865, test acc 0.837, time 2.6 sec\n",
-      "epoch 3, loss 0.0014, train acc 0.880, test acc 0.807, time 2.6 sec\n",
-      "epoch 4, loss 0.0013, train acc 0.887, test acc 0.860, time 2.6 sec\n",
-      "epoch 5, loss 0.0012, train acc 0.895, test acc 0.844, time 2.5 sec\n"
+      "epoch 1, loss 0.0039, train acc 0.790, test acc 0.835, time 2.9 sec\n",
+      "epoch 2, loss 0.0018, train acc 0.866, test acc 0.821, time 3.2 sec\n",
+      "epoch 3, loss 0.0014, train acc 0.879, test acc 0.857, time 2.6 sec\n",
+      "epoch 4, loss 0.0013, train acc 0.886, test acc 0.820, time 2.7 sec\n",
+      "epoch 5, loss 0.0012, train acc 0.891, test acc 0.859, time 2.8 sec\n"
     ]
    }
   ],
@@ -169,17 +168,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "(tensor([ 1.0408,  1.2496,  0.9876,  0.9680,  1.1117,  0.9562], device='cuda:0'),\n",
-       " tensor([-0.5720,  0.1018, -0.5304, -0.5216,  0.3563, -0.1280], device='cuda:0'))"
+       "(tensor([ 1.2537,  1.2284,  1.0100,  1.0171,  0.9809,  1.1870], device='cuda:0'),\n",
+       " tensor([ 0.0962,  0.3299, -0.5506,  0.1522, -0.1556,  0.2240], device='cuda:0'))"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -188,12 +187,65 @@
    "net[1].gamma.view((-1,)), net[1].beta.view((-1,))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.10.3 简洁实现"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "net = nn.Sequential(\n",
+    "            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size\n",
+    "            nn.BatchNorm2d(6),\n",
+    "            nn.Sigmoid(),\n",
+    "            nn.MaxPool2d(2, 2), # kernel_size, stride\n",
+    "            nn.Conv2d(6, 16, 5),\n",
+    "            nn.BatchNorm2d(16),\n",
+    "            nn.Sigmoid(),\n",
+    "            nn.MaxPool2d(2, 2),\n",
+    "            d2l.FlattenLayer(),\n",
+    "            nn.Linear(16*4*4, 120),\n",
+    "            nn.BatchNorm1d(120),\n",
+    "            nn.Sigmoid(),\n",
+    "            nn.Linear(120, 84),\n",
+    "            nn.BatchNorm1d(84),\n",
+    "            nn.Sigmoid(),\n",
+    "            nn.Linear(84, 10)\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "training on  cuda\n",
+      "epoch 1, loss 0.0054, train acc 0.767, test acc 0.795, time 2.0 sec\n",
+      "epoch 2, loss 0.0024, train acc 0.851, test acc 0.748, time 2.0 sec\n",
+      "epoch 3, loss 0.0017, train acc 0.872, test acc 0.814, time 2.2 sec\n",
+      "epoch 4, loss 0.0014, train acc 0.883, test acc 0.818, time 2.1 sec\n",
+      "epoch 5, loss 0.0013, train acc 0.889, test acc 0.734, time 1.8 sec\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 256\n",
+    "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)\n",
+    "\n",
+    "lr, num_epochs = 0.001, 5\n",
+    "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
+    "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
+   ]
  }
 ],
 "metadata": {