diff --git a/01.fit_a_line/README.en.ipynb b/01.fit_a_line/README.en.ipynb index 240546a32a47f6bd6a837a636d1e7a728f00d861..a3f95c5ca96a7e6149c474d2ebb2c1d958bbe546 100644 --- a/01.fit_a_line/README.en.ipynb +++ b/01.fit_a_line/README.en.ipynb @@ -308,19 +308,41 @@ "editable": true }, "source": [ - "# event_handler to print training and testing info\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display\n", + "import cPickle\n", + "\n", + "step=0\n", + "\n", + "train_costs=[],[]\n", + "test_costs=[],[]\n", + "\n", "def event_handler(event):\n", + " global step\n", + " global train_costs\n", + " global test_costs\n", " if isinstance(event, paddle.event.EndIteration):\n", - " if event.batch_id % 100 == 0:\n", - " print \"Pass %d, Batch %d, Cost %f\" % (\n", - " event.pass_id, event.batch_id, event.cost)\n", - "\n", - " if isinstance(event, paddle.event.EndPass):\n", - " result = trainer.test(\n", - " reader=paddle.batch(\n", - " uci_housing.test(), batch_size=2),\n", - " feeding=feeding)\n", - " print \"Test %d, Cost %f\" % (event.pass_id, result.cost)\n" + " need_plot = False\n", + " if step % 10 == 0: # every 10 batches, record a train cost\n", + " train_costs[0].append(step)\n", + " train_costs[1].append(event.cost)\n", + "\n", + " if step % 1000 == 0: # every 1000 batches, record a test cost\n", + " result = trainer.test(\n", + " reader=paddle.batch(\n", + " uci_housing.test(), batch_size=2),\n", + " feeding=feeding)\n", + " test_costs[0].append(step)\n", + " test_costs[1].append(result.cost)\n", + "\n", + " if step % 100 == 0: # every 100 batches, update cost plot\n", + " plt.plot(*train_costs)\n", + " plt.plot(*test_costs)\n", + " plt.legend(['Train Cost', 'Test Cost'], loc='upper left')\n", + " display.clear_output(wait=True)\n", + " display.display(plt.gcf())\n", + " plt.gcf().clear()\n", + " step += 1\n" ], "outputs": [ { @@ -372,6 +394,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "\n", + "![png](./image/train-and-test.png)\n", "\n", "## Summary\n", "This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation.\n", diff --git a/01.fit_a_line/README.en.md b/01.fit_a_line/README.en.md index 4c9b3859f809722e73d05ce18ade5a9a0305ecd3..7f42b7da50adec54a779d388cec6572177dd88f3 100644 --- a/01.fit_a_line/README.en.md +++ b/01.fit_a_line/README.en.md @@ -163,19 +163,41 @@ feeding={'x': 0, 'y': 1} Moreover, an event handler is provided to print the training progress: ```python -# event_handler to print training and testing info +import matplotlib.pyplot as plt +from IPython import display +import cPickle + +step=0 + +train_costs=[],[] +test_costs=[],[] + def event_handler(event): + global step + global train_costs + global test_costs if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % ( - event.pass_id, event.batch_id, event.cost) - - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.batch( - uci_housing.test(), batch_size=2), - feeding=feeding) - print "Test %d, Cost %f" % (event.pass_id, result.cost) + need_plot = False + if step % 10 == 0: # every 10 batches, record a train cost + train_costs[0].append(step) + train_costs[1].append(event.cost) + + if step % 1000 == 0: # every 1000 batches, record a test cost + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding=feeding) + test_costs[0].append(step) + test_costs[1].append(result.cost) + + if step % 100 == 0: # every 100 batches, update cost plot + plt.plot(*train_costs) + plt.plot(*test_costs) + plt.legend(['Train Cost', 'Test Cost'], loc='upper left') + display.clear_output(wait=True) + display.display(plt.gcf()) + plt.gcf().clear() + step += 1 ``` ### Start Training @@ -191,6 +213,8 @@ trainer.train( num_passes=30) ``` +![png](./image/train-and-test.png) + ## Summary This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation. diff --git a/01.fit_a_line/README.ipynb b/01.fit_a_line/README.ipynb index 8ccf9566eec289e5bc8dab6b456e53c58bdde8ce..78666779bbf83d8469f18ab5d8cba17fd664919f 100644 --- a/01.fit_a_line/README.ipynb +++ b/01.fit_a_line/README.ipynb @@ -35,7 +35,7 @@ "\n", "$\\hat{Y}$ 表示模型的预测结果,用来和真实值$Y$区分。模型要学习的参数即:$\\omega_1, \\ldots, \\omega_{13}, b$。\n", "\n", - "建立模型后,我们需要给模型一个优化目标,使得学到的参数能够让预测值$\\hat{Y}$尽可能地接近真实值$Y$。这里我们引入损失函数([Loss Function](https://en.wikipedia.org/wiki/Loss_function),或Cost Function)这个概念。 输入任意一个数据样本的目标值$y_{i}$和模型给出的预测值$\\hat{y_{i}}$,损失函数输出一个非负的实值。这个实质通常用来反映模型误差的大小。\n", + "建立模型后,我们需要给模型一个优化目标,使得学到的参数能够让预测值$\\hat{Y}$尽可能地接近真实值$Y$。这里我们引入损失函数([Loss Function](https://en.wikipedia.org/wiki/Loss_function),或Cost Function)这个概念。 输入任意一个数据样本的目标值$y_{i}$和模型给出的预测值$\\hat{y_{i}}$,损失函数输出一个非负的实值。这个实值通常用来反映模型误差的大小。\n", "\n", "对于线性回归模型来讲,最常见的损失函数就是均方误差(Mean Squared Error, [MSE](https://en.wikipedia.org/wiki/Mean_squared_error))了,它的形式是:\n", "\n", @@ -304,18 +304,41 @@ }, "source": [ "# event_handler to print training and testing info\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display\n", + "import cPickle\n", + "\n", + "step=0\n", + "\n", + "train_costs=[],[]\n", + "test_costs=[],[]\n", + "\n", "def event_handler(event):\n", + " global step\n", + " global train_costs\n", + " global test_costs\n", " if isinstance(event, paddle.event.EndIteration):\n", - " if event.batch_id % 100 == 0:\n", - " print \"Pass %d, Batch %d, Cost %f\" % (\n", - " event.pass_id, event.batch_id, event.cost)\n", - "\n", - " if isinstance(event, paddle.event.EndPass):\n", - " result = trainer.test(\n", - " reader=paddle.batch(\n", - " uci_housing.test(), batch_size=2),\n", - " feeding=feeding)\n", - " print \"Test %d, Cost %f\" % (event.pass_id, result.cost)\n" + " need_plot = False\n", + " if step % 10 == 0: # every 10 batches, record a train cost\n", + " train_costs[0].append(step)\n", + " train_costs[1].append(event.cost)\n", + "\n", + " if step % 1000 == 0: # every 1000 batches, record a test cost\n", + " result = trainer.test(\n", + " reader=paddle.batch(\n", + " uci_housing.test(), batch_size=2),\n", + " feeding=feeding)\n", + " test_costs[0].append(step)\n", + " test_costs[1].append(result.cost)\n", + "\n", + " if step % 100 == 0: # every 100 batches, update cost plot\n", + " plt.plot(*train_costs)\n", + " plt.plot(*test_costs)\n", + " plt.legend(['Train Cost', 'Test Cost'], loc='upper left')\n", + " display.clear_output(wait=True)\n", + " display.display(plt.gcf())\n", + " plt.gcf().clear()\n", + " step += 1\n" ], "outputs": [ { @@ -367,6 +390,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "\n", + "![png](./image/train-and-test.png)\n", "\n", "## 总结\n", "在这章里,我们借助波士顿房价这一数据集,介绍了线性回归模型的基本概念,以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来,因此弄清楚线性模型的原理和局限非常重要。\n", diff --git a/01.fit_a_line/README.md b/01.fit_a_line/README.md index 9f1ad471e49af4d2dc4c95b0127a12646dc184a6..2c3f1fd4dd61aec8ad6e97881d6229f4e6ef91b1 100644 --- a/01.fit_a_line/README.md +++ b/01.fit_a_line/README.md @@ -159,18 +159,41 @@ feeding={'x': 0, 'y': 1} ```python # event_handler to print training and testing info +import matplotlib.pyplot as plt +from IPython import display +import cPickle + +step=0 + +train_costs=[],[] +test_costs=[],[] + def event_handler(event): + global step + global train_costs + global test_costs if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % ( - event.pass_id, event.batch_id, event.cost) - - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.batch( - uci_housing.test(), batch_size=2), - feeding=feeding) - print "Test %d, Cost %f" % (event.pass_id, result.cost) + need_plot = False + if step % 10 == 0: # every 10 batches, record a train cost + train_costs[0].append(step) + train_costs[1].append(event.cost) + + if step % 1000 == 0: # every 1000 batches, record a test cost + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding=feeding) + test_costs[0].append(step) + test_costs[1].append(result.cost) + + if step % 100 == 0: # every 100 batches, update cost plot + plt.plot(*train_costs) + plt.plot(*test_costs) + plt.legend(['Train Cost', 'Test Cost'], loc='upper left') + display.clear_output(wait=True) + display.display(plt.gcf()) + plt.gcf().clear() + step += 1 ``` ### 开始训练 @@ -186,6 +209,8 @@ trainer.train( num_passes=30) ``` +![png](./image/train-and-test.png) + ## 总结 在这章里,我们借助波士顿房价这一数据集,介绍了线性回归模型的基本概念,以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来,因此弄清楚线性模型的原理和局限非常重要。 diff --git a/01.fit_a_line/image/train-and-test.png b/01.fit_a_line/image/train-and-test.png new file mode 100644 index 0000000000000000000000000000000000000000..bcd304a6a0baf30ecfbc43e08fc0aca179d05958 Binary files /dev/null and b/01.fit_a_line/image/train-and-test.png differ diff --git a/01.fit_a_line/index.en.html b/01.fit_a_line/index.en.html index 432a8692c922f28c833cee814ebe41c6ca40f8a8..06f5559174c252eaf951211d86ce86c7897d9ed9 100644 --- a/01.fit_a_line/index.en.html +++ b/01.fit_a_line/index.en.html @@ -205,19 +205,41 @@ feeding={'x': 0, 'y': 1} Moreover, an event handler is provided to print the training progress: ```python -# event_handler to print training and testing info +import matplotlib.pyplot as plt +from IPython import display +import cPickle + +step=0 + +train_costs=[],[] +test_costs=[],[] + def event_handler(event): + global step + global train_costs + global test_costs if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % ( - event.pass_id, event.batch_id, event.cost) - - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.batch( - uci_housing.test(), batch_size=2), - feeding=feeding) - print "Test %d, Cost %f" % (event.pass_id, result.cost) + need_plot = False + if step % 10 == 0: # every 10 batches, record a train cost + train_costs[0].append(step) + train_costs[1].append(event.cost) + + if step % 1000 == 0: # every 1000 batches, record a test cost + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding=feeding) + test_costs[0].append(step) + test_costs[1].append(result.cost) + + if step % 100 == 0: # every 100 batches, update cost plot + plt.plot(*train_costs) + plt.plot(*test_costs) + plt.legend(['Train Cost', 'Test Cost'], loc='upper left') + display.clear_output(wait=True) + display.display(plt.gcf()) + plt.gcf().clear() + step += 1 ``` ### Start Training @@ -233,6 +255,8 @@ trainer.train( num_passes=30) ``` +![png](./image/train-and-test.png) + ## Summary This chapter introduces *Linear Regression* and how to train and test this model with PaddlePaddle, using the UCI Housing Data Set. Because a large number of more complex models and techniques are derived from linear regression, it is important to understand its underlying theory and limitation. diff --git a/01.fit_a_line/index.html b/01.fit_a_line/index.html index 40105f31b7e66d9c89078db78eb2e701a00e9d58..a866e13bf3321c22973198d8ae639327348c4587 100644 --- a/01.fit_a_line/index.html +++ b/01.fit_a_line/index.html @@ -201,18 +201,41 @@ feeding={'x': 0, 'y': 1} ```python # event_handler to print training and testing info +import matplotlib.pyplot as plt +from IPython import display +import cPickle + +step=0 + +train_costs=[],[] +test_costs=[],[] + def event_handler(event): + global step + global train_costs + global test_costs if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % ( - event.pass_id, event.batch_id, event.cost) - - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.batch( - uci_housing.test(), batch_size=2), - feeding=feeding) - print "Test %d, Cost %f" % (event.pass_id, result.cost) + need_plot = False + if step % 10 == 0: # every 10 batches, record a train cost + train_costs[0].append(step) + train_costs[1].append(event.cost) + + if step % 1000 == 0: # every 1000 batches, record a test cost + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding=feeding) + test_costs[0].append(step) + test_costs[1].append(result.cost) + + if step % 100 == 0: # every 100 batches, update cost plot + plt.plot(*train_costs) + plt.plot(*test_costs) + plt.legend(['Train Cost', 'Test Cost'], loc='upper left') + display.clear_output(wait=True) + display.display(plt.gcf()) + plt.gcf().clear() + step += 1 ``` ### 开始训练 @@ -228,6 +251,8 @@ trainer.train( num_passes=30) ``` +![png](./image/train-and-test.png) + ## 总结 在这章里,我们借助波士顿房价这一数据集,介绍了线性回归模型的基本概念,以及如何使用PaddlePaddle实现训练和测试的过程。很多的模型和技巧都是从简单的线性回归模型演化而来,因此弄清楚线性模型的原理和局限非常重要。