Merge pull request #259 from jacquesqiao/draw-fit-aline

add draw line in 01.fit-a-line

Merge pull request #259 from jacquesqiao/draw-fit-aline
add draw line in 01.fit-a-line
86e22929 · jacquesqiao · GitHub · ceb5c99f · c4e36445 · 86e22929
7 changed file
--- a/01.fit_a_line/README.en.ipynb
+++ b/01.fit_a_line/README.en.ipynb
@@ -308,19 +308,41 @@
        "editable": true
      },
      "source": [
-        "# event_handler to print training and testing info\n",
+        "import matplotlib.pyplot as plt\n",
+        "from IPython import display\n",
+        "import cPickle\n",
+        "\n",
+        "step=0\n",
+        "\n",
+        "train_costs=[],[]\n",
+        "test_costs=[],[]\n",
+        "\n",
        "def event_handler(event):\n",
+        "    global step\n",
+        "    global train_costs\n",
+        "    global test_costs\n",
        "    if isinstance(event, paddle.event.EndIteration):\n",
-        "        if event.batch_id % 100 == 0:\n",
-        "            print \"Pass %d, Batch %d, Cost %f\" % (\n",
-        "                event.pass_id, event.batch_id, event.cost)\n",
-        "\n",
-        "    if isinstance(event, paddle.event.EndPass):\n",
-        "        result = trainer.test(\n",
-        "            reader=paddle.batch(\n",
-        "                uci_housing.test(), batch_size=2),\n",
-        "            feeding=feeding)\n",
-        "        print \"Test %d, Cost %f\" % (event.pass_id, result.cost)\n"
+        "        need_plot = False\n",
+        "        if step % 10 == 0:  # every 10 batches, record a train cost\n",
+        "            train_costs[0].append(step)\n",
+        "            train_costs[1].append(event.cost)\n",
+        "\n",
+        "        if step % 1000 == 0: # every 1000 batches, record a test cost\n",
+        "            result = trainer.test(\n",
+        "                reader=paddle.batch(\n",
+        "                    uci_housing.test(), batch_size=2),\n",
+        "                feeding=feeding)\n",
+        "            test_costs[0].append(step)\n",
+        "            test_costs[1].append(result.cost)\n",
+        "\n",
+        "        if step % 100 == 0: # every 100 batches, update cost plot\n",
+        "            plt.plot(*train_costs)\n",
+        "            plt.plot(*test_costs)\n",
+        "            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')\n",
+        "            display.clear_output(wait=True)\n",
+        "            display.display(plt.gcf())\n",
+        "            plt.gcf().clear()\n",
+        "        step += 1\n"
      ],
      "outputs": [
        {
@@ -372,6 +394,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
+        "\n",
+        "![png](./image/train-and-test.png)\n",
        "\n",
        "## Summary\n",
        "This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation.\n",

--- a/01.fit_a_line/README.en.md
+++ b/01.fit_a_line/README.en.md
@@ -163,19 +163,41 @@ feeding={'x': 0, 'y': 1}
 Moreover, an event handler is provided to print the training progress:

 ```python
-# event_handler to print training and testing info
+import matplotlib.pyplot as plt
+from IPython import display
+import cPickle
+
+step=0
+
+train_costs=[],[]
+test_costs=[],[]
+
 def event_handler(event):
+    global step
+    global train_costs
+    global test_costs
    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (
-                event.pass_id, event.batch_id, event.cost)
-
-    if isinstance(event, paddle.event.EndPass):
-        result = trainer.test(
-            reader=paddle.batch(
-                uci_housing.test(), batch_size=2),
-            feeding=feeding)
-        print "Test %d, Cost %f" % (event.pass_id, result.cost)
+        need_plot = False
+        if step % 10 == 0:  # every 10 batches, record a train cost
+            train_costs[0].append(step)
+            train_costs[1].append(event.cost)
+
+        if step % 1000 == 0: # every 1000 batches, record a test cost
+            result = trainer.test(
+                reader=paddle.batch(
+                    uci_housing.test(), batch_size=2),
+                feeding=feeding)
+            test_costs[0].append(step)
+            test_costs[1].append(result.cost)
+
+        if step % 100 == 0: # every 100 batches, update cost plot
+            plt.plot(*train_costs)
+            plt.plot(*test_costs)
+            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')
+            display.clear_output(wait=True)
+            display.display(plt.gcf())
+            plt.gcf().clear()
+        step += 1
 ```

 ### Start Training
@@ -191,6 +213,8 @@ trainer.train(
    num_passes=30)
 ```

+![png](./image/train-and-test.png)
+
 ## Summary
 This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation.


--- a/01.fit_a_line/README.ipynb
+++ b/01.fit_a_line/README.ipynb
@@ -35,7 +35,7 @@
        "\n",
        "$\\hat{Y}$ 表示模型的预测结果，用来和真实值$Y$区分。模型要学习的参数即：$\\omega_1, \\ldots, \\omega_{13}, b$。\n",
        "\n",
-        "建立模型后，我们需要给模型一个优化目标，使得学到的参数能够让预测值$\\hat{Y}$尽可能地接近真实值$Y$。这里我们引入损失函数（[Loss Function](https://en.wikipedia.org/wiki/Loss_function)，或Cost Function）这个概念。 输入任意一个数据样本的目标值$y_{i}$和模型给出的预测值$\\hat{y_{i}}$，损失函数输出一个非负的实值。这个实质通常用来反映模型误差的大小。\n",
+        "建立模型后，我们需要给模型一个优化目标，使得学到的参数能够让预测值$\\hat{Y}$尽可能地接近真实值$Y$。这里我们引入损失函数（[Loss Function](https://en.wikipedia.org/wiki/Loss_function)，或Cost Function）这个概念。 输入任意一个数据样本的目标值$y_{i}$和模型给出的预测值$\\hat{y_{i}}$，损失函数输出一个非负的实值。这个实值通常用来反映模型误差的大小。\n",
        "\n",
        "对于线性回归模型来讲，最常见的损失函数就是均方误差（Mean Squared Error， [MSE](https://en.wikipedia.org/wiki/Mean_squared_error)）了，它的形式是：\n",
        "\n",
@@ -304,18 +304,41 @@
      },
      "source": [
        "# event_handler to print training and testing info\n",
+        "import matplotlib.pyplot as plt\n",
+        "from IPython import display\n",
+        "import cPickle\n",
+        "\n",
+        "step=0\n",
+        "\n",
+        "train_costs=[],[]\n",
+        "test_costs=[],[]\n",
+        "\n",
        "def event_handler(event):\n",
+        "    global step\n",
+        "    global train_costs\n",
+        "    global test_costs\n",
        "    if isinstance(event, paddle.event.EndIteration):\n",
-        "        if event.batch_id % 100 == 0:\n",
-        "            print \"Pass %d, Batch %d, Cost %f\" % (\n",
-        "                event.pass_id, event.batch_id, event.cost)\n",
-        "\n",
-        "    if isinstance(event, paddle.event.EndPass):\n",
-        "        result = trainer.test(\n",
-        "            reader=paddle.batch(\n",
-        "                uci_housing.test(), batch_size=2),\n",
-        "            feeding=feeding)\n",
-        "        print \"Test %d, Cost %f\" % (event.pass_id, result.cost)\n"
+        "        need_plot = False\n",
+        "        if step % 10 == 0:  # every 10 batches, record a train cost\n",
+        "            train_costs[0].append(step)\n",
+        "            train_costs[1].append(event.cost)\n",
+        "\n",
+        "        if step % 1000 == 0: # every 1000 batches, record a test cost\n",
+        "            result = trainer.test(\n",
+        "                reader=paddle.batch(\n",
+        "                    uci_housing.test(), batch_size=2),\n",
+        "                feeding=feeding)\n",
+        "            test_costs[0].append(step)\n",
+        "            test_costs[1].append(result.cost)\n",
+        "\n",
+        "        if step % 100 == 0: # every 100 batches, update cost plot\n",
+        "            plt.plot(*train_costs)\n",
+        "            plt.plot(*test_costs)\n",
+        "            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')\n",
+        "            display.clear_output(wait=True)\n",
+        "            display.display(plt.gcf())\n",
+        "            plt.gcf().clear()\n",
+        "        step += 1\n"
      ],
      "outputs": [
        {
@@ -367,6 +390,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
+        "\n",
+        "![png](./image/train-and-test.png)\n",
        "\n",
        "## 总结\n",
        "在这章里，我们借助波士顿房价这一数据集，介绍了线性回归模型的基本概念，以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来，因此弄清楚线性模型的原理和局限非常重要。\n",

--- a/01.fit_a_line/README.md
+++ b/01.fit_a_line/README.md
@@ -159,18 +159,41 @@ feeding={'x': 0, 'y': 1}

 ```python
 # event_handler to print training and testing info
+import matplotlib.pyplot as plt
+from IPython import display
+import cPickle
+
+step=0
+
+train_costs=[],[]
+test_costs=[],[]
+
 def event_handler(event):
+    global step
+    global train_costs
+    global test_costs
    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (
-                event.pass_id, event.batch_id, event.cost)
-
-    if isinstance(event, paddle.event.EndPass):
-        result = trainer.test(
-            reader=paddle.batch(
-                uci_housing.test(), batch_size=2),
-            feeding=feeding)
-        print "Test %d, Cost %f" % (event.pass_id, result.cost)
+        need_plot = False
+        if step % 10 == 0:  # every 10 batches, record a train cost
+            train_costs[0].append(step)
+            train_costs[1].append(event.cost)
+
+        if step % 1000 == 0: # every 1000 batches, record a test cost
+            result = trainer.test(
+                reader=paddle.batch(
+                    uci_housing.test(), batch_size=2),
+                feeding=feeding)
+            test_costs[0].append(step)
+            test_costs[1].append(result.cost)
+
+        if step % 100 == 0: # every 100 batches, update cost plot
+            plt.plot(*train_costs)
+            plt.plot(*test_costs)
+            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')
+            display.clear_output(wait=True)
+            display.display(plt.gcf())
+            plt.gcf().clear()
+        step += 1
 ```

 ### 开始训练
@@ -186,6 +209,8 @@ trainer.train(
    num_passes=30)
 ```

+![png](./image/train-and-test.png)
+
 ## 总结
 在这章里，我们借助波士顿房价这一数据集，介绍了线性回归模型的基本概念，以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来，因此弄清楚线性模型的原理和局限非常重要。


--- a/01.fit_a_line/image/train-and-test.png
+++ b/01.fit_a_line/image/train-and-test.png
--- a/01.fit_a_line/index.en.html
+++ b/01.fit_a_line/index.en.html
@@ -205,19 +205,41 @@ feeding={'x': 0, 'y': 1}
 Moreover, an event handler is provided to print the training progress:

 ```python
-# event_handler to print training and testing info
+import matplotlib.pyplot as plt
+from IPython import display
+import cPickle
+
+step=0
+
+train_costs=[],[]
+test_costs=[],[]
+
 def event_handler(event):
+    global step
+    global train_costs
+    global test_costs
    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (
-                event.pass_id, event.batch_id, event.cost)
-
-    if isinstance(event, paddle.event.EndPass):
-        result = trainer.test(
-            reader=paddle.batch(
-                uci_housing.test(), batch_size=2),
-            feeding=feeding)
-        print "Test %d, Cost %f" % (event.pass_id, result.cost)
+        need_plot = False
+        if step % 10 == 0:  # every 10 batches, record a train cost
+            train_costs[0].append(step)
+            train_costs[1].append(event.cost)
+
+        if step % 1000 == 0: # every 1000 batches, record a test cost
+            result = trainer.test(
+                reader=paddle.batch(
+                    uci_housing.test(), batch_size=2),
+                feeding=feeding)
+            test_costs[0].append(step)
+            test_costs[1].append(result.cost)
+
+        if step % 100 == 0: # every 100 batches, update cost plot
+            plt.plot(*train_costs)
+            plt.plot(*test_costs)
+            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')
+            display.clear_output(wait=True)
+            display.display(plt.gcf())
+            plt.gcf().clear()
+        step += 1
 ```

 ### Start Training
@@ -233,6 +255,8 @@ trainer.train(
    num_passes=30)
 ```

+![png](./image/train-and-test.png)
+
 ## Summary
 This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation.


--- a/01.fit_a_line/index.html
+++ b/01.fit_a_line/index.html
@@ -201,18 +201,41 @@ feeding={'x': 0, 'y': 1}

 ```python
 # event_handler to print training and testing info
+import matplotlib.pyplot as plt
+from IPython import display
+import cPickle
+
+step=0
+
+train_costs=[],[]
+test_costs=[],[]
+
 def event_handler(event):
+    global step
+    global train_costs
+    global test_costs
    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (
-                event.pass_id, event.batch_id, event.cost)
-
-    if isinstance(event, paddle.event.EndPass):
-        result = trainer.test(
-            reader=paddle.batch(
-                uci_housing.test(), batch_size=2),
-            feeding=feeding)
-        print "Test %d, Cost %f" % (event.pass_id, result.cost)
+        need_plot = False
+        if step % 10 == 0:  # every 10 batches, record a train cost
+            train_costs[0].append(step)
+            train_costs[1].append(event.cost)
+
+        if step % 1000 == 0: # every 1000 batches, record a test cost
+            result = trainer.test(
+                reader=paddle.batch(
+                    uci_housing.test(), batch_size=2),
+                feeding=feeding)
+            test_costs[0].append(step)
+            test_costs[1].append(result.cost)
+
+        if step % 100 == 0: # every 100 batches, update cost plot
+            plt.plot(*train_costs)
+            plt.plot(*test_costs)
+            plt.legend(['Train Cost', 'Test Cost'], loc='upper left')
+            display.clear_output(wait=True)
+            display.display(plt.gcf())
+            plt.gcf().clear()
+        step += 1
 ```

 ### 开始训练
@@ -228,6 +251,8 @@ trainer.train(
    num_passes=30)
 ```

+![png](./image/train-and-test.png)
+
 ## 总结
 在这章里，我们借助波士顿房价这一数据集，介绍了线性回归模型的基本概念，以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来，因此弄清楚线性模型的原理和局限非常重要。