From 68f41a4217046accd0c1da7f7c3e8c6fe72c5f67 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 3 Oct 2018 12:14:22 +0800 Subject: [PATCH] Port current book code and doc to python3 --- 01.fit_a_line/README.cn.md | 77 +++++++++++++++- 01.fit_a_line/README.md | 77 +++++++++++++++- 01.fit_a_line/image/ranges.png | Bin 6737 -> 6737 bytes 01.fit_a_line/plot.py | 83 ++++++++++++++++++ 01.fit_a_line/train.py | 4 +- 03.image_classification/README.cn.md | 2 +- 03.image_classification/README.md | 2 +- 03.image_classification/resnet.py | 2 +- 03.image_classification/train.py | 2 +- 04.word2vec/README.cn.md | 3 +- 04.word2vec/README.md | 3 +- 04.word2vec/train.py | 5 +- 06.understand_sentiment/README.cn.md | 2 +- 06.understand_sentiment/README.md | 2 +- 06.understand_sentiment/train_conv.py | 14 +-- 06.understand_sentiment/train_dyn_rnn.py | 14 +-- 06.understand_sentiment/train_stacked_lstm.py | 14 +-- 07.label_semantic_roles/README.cn.md | 5 +- 07.label_semantic_roles/README.md | 5 +- 07.label_semantic_roles/train.py | 5 +- 20 files changed, 278 insertions(+), 43 deletions(-) create mode 100644 01.fit_a_line/plot.py diff --git a/01.fit_a_line/README.cn.md b/01.fit_a_line/README.cn.md index bb6c1a4..d4b1b94 100644 --- a/01.fit_a_line/README.cn.md +++ b/01.fit_a_line/README.cn.md @@ -177,6 +177,80 @@ PaddlePaddle提供了读取数据者发生器机制来读取训练数据。读 feed_order=['x', 'y'] ``` +以及一个绘画器来进行绘制: + +```python +import six +import os + + +class PlotData(object): + def __init__(self): + self.step = [] + self.value = [] + + def append(self, step, value): + self.step.append(step) + self.value.append(value) + + def reset(self): + self.step = [] + self.value = [] + + +class Ploter(object): + def __init__(self, *args): + self.__args__ = args + self.__plot_data__ = {} + for title in args: + self.__plot_data__[title] = PlotData() + # demo in notebooks will use Ploter to plot figure, but when we convert + # the ipydb to py file for testing, the import of matplotlib will make the + # script crash. So we can use `export DISABLE_PLOT=True` to disable import + # these libs + self.__disable_plot__ = os.environ.get("DISABLE_PLOT") + if not self.__plot_is_disabled__(): + import matplotlib.pyplot as plt + from IPython import display + self.plt = plt + self.display = display + + def __plot_is_disabled__(self): + return self.__disable_plot__ == "True" + + def append(self, title, step, value): + assert isinstance(title, six.string_types) + assert title in self.__plot_data__ + data = self.__plot_data__[title] + assert isinstance(data, PlotData) + data.append(step, value) + + def plot(self, path=None): + if self.__plot_is_disabled__(): + return + + titles = [] + for title in self.__args__: + data = self.__plot_data__[title] + assert isinstance(data, PlotData) + if len(data.step) > 0: + titles.append(title) + self.plt.plot(data.step, data.value) + self.plt.legend(titles, loc='upper left') + if path is None: + self.display.clear_output(wait=True) + self.display.display(self.plt.gcf()) + else: + self.plt.savefig(path) + self.plt.gcf().clear() + + def reset(self): + for key in self.__plot_data__: + data = self.__plot_data__[key] + assert isinstance(data, PlotData) + data.reset() +``` + 除此之外,可以定义一个事件响应器来处理类似`打印训练进程`的事件: ```python @@ -184,7 +258,6 @@ feed_order=['x', 'y'] params_dirname = "fit_a_line.inference.model" # Plot data -from paddle.v2.plot import Ploter train_title = "Train cost" test_title = "Test cost" plot_cost = Ploter(train_title, test_title) @@ -259,7 +332,7 @@ inferencer = fluid.contrib.inferencer.Inferencer( batch_size = 10 test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size) -test_data = test_reader().next() +test_data = next(test_reader()) test_x = numpy.array([data[0] for data in test_data]).astype("float32") test_y = numpy.array([data[1] for data in test_data]).astype("float32") diff --git a/01.fit_a_line/README.md b/01.fit_a_line/README.md index 5742834..dd2fc04 100644 --- a/01.fit_a_line/README.md +++ b/01.fit_a_line/README.md @@ -196,6 +196,80 @@ for loading the training data. A reader may return multiple columns, and we need feed_order=['x', 'y'] ``` +And a ploter to plot metrics: + +```python +import six +import os + + +class PlotData(object): + def __init__(self): + self.step = [] + self.value = [] + + def append(self, step, value): + self.step.append(step) + self.value.append(value) + + def reset(self): + self.step = [] + self.value = [] + + +class Ploter(object): + def __init__(self, *args): + self.__args__ = args + self.__plot_data__ = {} + for title in args: + self.__plot_data__[title] = PlotData() + # demo in notebooks will use Ploter to plot figure, but when we convert + # the ipydb to py file for testing, the import of matplotlib will make the + # script crash. So we can use `export DISABLE_PLOT=True` to disable import + # these libs + self.__disable_plot__ = os.environ.get("DISABLE_PLOT") + if not self.__plot_is_disabled__(): + import matplotlib.pyplot as plt + from IPython import display + self.plt = plt + self.display = display + + def __plot_is_disabled__(self): + return self.__disable_plot__ == "True" + + def append(self, title, step, value): + assert isinstance(title, six.string_types) + assert title in self.__plot_data__ + data = self.__plot_data__[title] + assert isinstance(data, PlotData) + data.append(step, value) + + def plot(self, path=None): + if self.__plot_is_disabled__(): + return + + titles = [] + for title in self.__args__: + data = self.__plot_data__[title] + assert isinstance(data, PlotData) + if len(data.step) > 0: + titles.append(title) + self.plt.plot(data.step, data.value) + self.plt.legend(titles, loc='upper left') + if path is None: + self.display.clear_output(wait=True) + self.display.display(self.plt.gcf()) + else: + self.plt.savefig(path) + self.plt.gcf().clear() + + def reset(self): + for key in self.__plot_data__: + data = self.__plot_data__[key] + assert isinstance(data, PlotData) + data.reset() +``` + Moreover, an event handler is provided to print the training progress: ```python @@ -203,7 +277,6 @@ Moreover, an event handler is provided to print the training progress: params_dirname = "fit_a_line.inference.model" # Plot data -from paddle.v2.plot import Ploter train_title = "Train cost" test_title = "Test cost" plot_cost = Ploter(train_title, test_title) @@ -281,7 +354,7 @@ inferencer = fluid.contrib.inferencer.Inferencer( batch_size = 10 test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size) -test_data = test_reader().next() +test_data = next(test_reader()) test_x = numpy.array([data[0] for data in test_data]).astype("float32") test_y = numpy.array([data[1] for data in test_data]).astype("float32") diff --git a/01.fit_a_line/image/ranges.png b/01.fit_a_line/image/ranges.png index 5325df4800985983e17476f007658d1cdb170b1c..916337f0720ef221851e89456c5c295e2e13445f 100644 GIT binary patch delta 20 bcmca;a?xZ$9;5NZd 0: + titles.append(title) + self.plt.plot(data.step, data.value) + self.plt.legend(titles, loc='upper left') + if path is None: + self.display.clear_output(wait=True) + self.display.display(self.plt.gcf()) + else: + self.plt.savefig(path) + self.plt.gcf().clear() + + def reset(self): + for key in self.__plot_data__: + data = self.__plot_data__[key] + assert isinstance(data, PlotData) + data.reset() diff --git a/01.fit_a_line/train.py b/01.fit_a_line/train.py index 8019ff3..7953eee 100644 --- a/01.fit_a_line/train.py +++ b/01.fit_a_line/train.py @@ -70,7 +70,7 @@ feed_order = ['x', 'y'] params_dirname = "fit_a_line.inference.model" # Plot data -from paddle.v2.plot import Ploter +from plot import Ploter train_title = "Train cost" test_title = "Test cost" @@ -125,7 +125,7 @@ inferencer = Inferencer( batch_size = 10 test_reader = paddle.batch( paddle.dataset.uci_housing.test(), batch_size=batch_size) -test_data = test_reader().next() +test_data = next(test_reader()) test_x = numpy.array([data[0] for data in test_data]).astype("float32") test_y = numpy.array([data[1] for data in test_data]).astype("float32") diff --git a/03.image_classification/README.cn.md b/03.image_classification/README.cn.md index 53129f3..bb89a28 100644 --- a/03.image_classification/README.cn.md +++ b/03.image_classification/README.cn.md @@ -282,7 +282,7 @@ def layer_warp(block_func, input, ch_in, ch_out, count, stride): def resnet_cifar10(ipt, depth=32): # depth should be one of 20, 32, 44, 56, 110, 1202 assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 + n = (depth - 2) // 6 nStages = {16, 64, 128} conv1 = conv_bn_layer(ipt, ch_out=16, filter_size=3, stride=1, padding=1) res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) diff --git a/03.image_classification/README.md b/03.image_classification/README.md index 74d2c82..82da321 100644 --- a/03.image_classification/README.md +++ b/03.image_classification/README.md @@ -282,7 +282,7 @@ Note: besides the first convolutional layer and the last fully-connected layer, def resnet_cifar10(ipt, depth=32): # depth should be one of 20, 32, 44, 56, 110, 1202 assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 + n = (depth - 2) // 6 nStages = {16, 64, 128} conv1 = conv_bn_layer(ipt, ch_out=16, filter_size=3, stride=1, padding=1) res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) diff --git a/03.image_classification/resnet.py b/03.image_classification/resnet.py index f6b039a..b7d2f62 100644 --- a/03.image_classification/resnet.py +++ b/03.image_classification/resnet.py @@ -70,7 +70,7 @@ def layer_warp(block_func, input, ch_in, ch_out, count, stride): def resnet_cifar10(ipt, depth=32): # depth should be one of 20, 32, 44, 56, 110, 1202 assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 + n = (depth - 2) // 6 nStages = {16, 64, 128} conv1 = conv_bn_layer(ipt, ch_out=16, filter_size=3, stride=1, padding=1) res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) diff --git a/03.image_classification/train.py b/03.image_classification/train.py index 52394ed..cfe4832 100644 --- a/03.image_classification/train.py +++ b/03.image_classification/train.py @@ -102,7 +102,7 @@ def infer(use_cuda, inference_program, params_dirname=None): inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place) - # Prepare testing data. + # Prepare testing data. from PIL import Image import numpy as np import os diff --git a/04.word2vec/README.cn.md b/04.word2vec/README.cn.md index 5741ab5..dce7424 100644 --- a/04.word2vec/README.cn.md +++ b/04.word2vec/README.cn.md @@ -208,6 +208,7 @@ import numpy from functools import partial import math import os +import six import sys from __future__ import print_function ``` @@ -394,7 +395,7 @@ def infer(use_cuda, inference_program, params_dirname=None): most_possible_word_index = numpy.argmax(result[0]) print(most_possible_word_index) print([ - key for key, value in word_dict.iteritems() + key for key, value in six.iteritems(word_dict) if value == most_possible_word_index ][0]) ``` diff --git a/04.word2vec/README.md b/04.word2vec/README.md index eeb9992..3151b73 100644 --- a/04.word2vec/README.md +++ b/04.word2vec/README.md @@ -221,6 +221,7 @@ import numpy from functools import partial import math import os +import six import sys from __future__ import print_function ``` @@ -412,7 +413,7 @@ def infer(use_cuda, inference_program, params_dirname=None): most_possible_word_index = numpy.argmax(result[0]) print(most_possible_word_index) print([ - key for key, value in word_dict.iteritems() + key for key, value in six.iteritems(word_dict) if value == most_possible_word_index ][0]) ``` diff --git a/04.word2vec/train.py b/04.word2vec/train.py index c07b622..229d018 100644 --- a/04.word2vec/train.py +++ b/04.word2vec/train.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function -import paddle.v2 as paddle +import paddle as paddle import paddle.fluid as fluid +import six import sys try: @@ -176,7 +177,7 @@ def infer(use_cuda, inference_program, params_dirname=None): most_possible_word_index = numpy.argmax(result[0]) print(most_possible_word_index) print([ - key for key, value in word_dict.iteritems() + key for key, value in six.iteritems(word_dict) if value == most_possible_word_index ][0]) diff --git a/06.understand_sentiment/README.cn.md b/06.understand_sentiment/README.cn.md index f90390a..5614abe 100644 --- a/06.understand_sentiment/README.cn.md +++ b/06.understand_sentiment/README.cn.md @@ -274,7 +274,7 @@ params_dirname = "understand_sentiment_conv.inference.model" def event_handler(event): if isinstance(event, fluid.contrib.trainer.EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 10: trainer.save_params(params_dirname) diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md index a658415..6bad749 100644 --- a/06.understand_sentiment/README.md +++ b/06.understand_sentiment/README.md @@ -281,7 +281,7 @@ params_dirname = "understand_sentiment_conv.inference.model" def event_handler(event): if isinstance(event, fluid.contrib.trainer.EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 10: trainer.save_params(params_dirname) diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py index b425127..eeef644 100644 --- a/06.understand_sentiment/train_conv.py +++ b/06.understand_sentiment/train_conv.py @@ -111,7 +111,7 @@ def train(use_cuda, train_program, params_dirname): event.step, avg_cost, acc)) print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + event.step, event.epoch, list(map(np.array, event.metrics)))) elif isinstance(event, EndEpochEvent): trainer.save_params(params_dirname) @@ -133,14 +133,14 @@ def infer(use_cuda, inference_program, params_dirname=None): place=place) # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. reviews_str = [ diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py index bea431b..090720a 100644 --- a/06.understand_sentiment/train_dyn_rnn.py +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -128,7 +128,7 @@ def train(use_cuda, train_program, params_dirname): event.step, avg_cost, acc)) print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + event.step, event.epoch, list(map(np.array, event.metrics)))) elif isinstance(event, EndEpochEvent): trainer.save_params(params_dirname) @@ -150,14 +150,14 @@ def infer(use_cuda, inference_program, params_dirname=None): place=place) # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. reviews_str = [ diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py index 11f67fb..b665c6e 100644 --- a/06.understand_sentiment/train_stacked_lstm.py +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -119,7 +119,7 @@ def train(use_cuda, train_program, params_dirname): event.step, avg_cost, acc)) print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, map(np.array, event.metrics))) + event.step, event.epoch, list(map(np.array, event.metrics)))) elif isinstance(event, EndEpochEvent): trainer.save_params(params_dirname) @@ -141,14 +141,14 @@ def infer(use_cuda, inference_program, params_dirname=None): place=place) # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. # Note that lod info should be a list of lists. reviews_str = [ diff --git a/07.label_semantic_roles/README.cn.md b/07.label_semantic_roles/README.cn.md index 0891f5b..34baef5 100644 --- a/07.label_semantic_roles/README.cn.md +++ b/07.label_semantic_roles/README.cn.md @@ -184,8 +184,9 @@ from __future__ import print_function import math, os import numpy as np import paddle -import paddle.v2.dataset.conll05 as conll05 +import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid +import six import time with_gpu = os.getenv('WITH_GPU', '0') != '0' @@ -417,7 +418,7 @@ def train(use_cuda, save_dirname=None, is_local=True): start_time = time.time() batch_id = 0 - for pass_id in xrange(PASS_NUM): + for pass_id in six.moves.xrange(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), diff --git a/07.label_semantic_roles/README.md b/07.label_semantic_roles/README.md index 623d035..0a6ce1c 100644 --- a/07.label_semantic_roles/README.md +++ b/07.label_semantic_roles/README.md @@ -207,8 +207,9 @@ from __future__ import print_function import math, os import numpy as np import paddle -import paddle.v2.dataset.conll05 as conll05 +import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid +import six import time with_gpu = os.getenv('WITH_GPU', '0') != '0' @@ -427,7 +428,7 @@ def train(use_cuda, save_dirname=None, is_local=True): start_time = time.time() batch_id = 0 - for pass_id in xrange(PASS_NUM): + for pass_id in six.moves.xrange(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), diff --git a/07.label_semantic_roles/train.py b/07.label_semantic_roles/train.py index 780167a..87c7f39 100644 --- a/07.label_semantic_roles/train.py +++ b/07.label_semantic_roles/train.py @@ -3,8 +3,9 @@ from __future__ import print_function import math, os import numpy as np import paddle -import paddle.v2.dataset.conll05 as conll05 +import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid +import six import time with_gpu = os.getenv('WITH_GPU', '0') != '0' @@ -167,7 +168,7 @@ def train(use_cuda, save_dirname=None, is_local=True): start_time = time.time() batch_id = 0 - for pass_id in xrange(PASS_NUM): + for pass_id in six.moves.xrange(PASS_NUM): for data in train_data(): cost = exe.run( main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) -- GitLab