diff --git a/06.understand_sentiment/README.cn.md b/06.understand_sentiment/README.cn.md index c93fab46de786422e891310f22ec96062ae737ec..5a60edd6695569c091fb5c06db25cd7fa8423b08 100644 --- a/06.understand_sentiment/README.cn.md +++ b/06.understand_sentiment/README.cn.md @@ -110,24 +110,16 @@ Paddle在`dataset/imdb.py`中提实现了imdb数据集的自动下载和读取 from __future__ import print_function import paddle import paddle.fluid as fluid -from functools import partial import numpy as np -try: - from paddle.fluid.contrib.trainer import * - from paddle.fluid.contrib.inferencer import * -except ImportError: - print( - "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", - file=sys.stderr) - from paddle.fluid.trainer import * - from paddle.fluid.inferencer import * +import sys +import math CLASS_DIM = 2 EMB_DIM = 128 HID_DIM = 512 STACKED_NUM = 3 BATCH_SIZE = 128 -USE_GPU = False + ``` @@ -212,8 +204,7 @@ def inference_program(word_dict): 在测试过程中,分类器会计算各个输出的概率。第一个返回的数值规定为 损耗(cost)。 ```python -def train_program(word_dict): - prediction = inference_program(word_dict) +def train_program(prediction): label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -258,59 +249,77 @@ train_reader = paddle.batch( 训练器需要一个训练程序和一个训练优化函数。 ```python -trainer = Trainer( - train_func=partial(train_program, word_dict), - place=place, - optimizer_func=optimizer_func) +exe = fluid.Executor(place) +prediction = inference_program(word_dict) +[avg_cost, accuracy] = train_program(prediction) +sgd_optimizer = optimizer_func() +sgd_optimizer.minimize(avg_cost) ``` -### 提供数据 +### 提供数据并构建主训练循环 `feed_order`用来定义每条产生的数据和`paddle.layer.data`之间的映射关系。比如,`imdb.train`产生的第一列的数据对应的是`words`这个特征。 -```python -feed_order = ['words', 'label'] -``` - -### 事件处理器 - -回调函数event_handler在一个之前定义好的事件发生后会被调用。例如,我们可以在每步训练结束后查看误差。 - ```python # Specify the directory path to save the parameters params_dirname = "understand_sentiment_conv.inference.model" -def event_handler(event): - if isinstance(event, EndStepEvent): - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, list(map(np.array, event.metrics)))) - - if event.step == 10: - trainer.save_params(params_dirname) - trainer.stop() +feed_order = ['words', 'label'] +pass_num = 1 + +def train_loop(main_program): + exe.run(fluid.default_startup_program()) + feed_var_list_loop = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder( + feed_list=feed_var_list_loop, place=place) + + test_program = fluid.default_main_program().clone(for_test=True) + + for epoch_id in range(pass_num): + for step_id, data in enumerate(train_reader()): + metrics = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_cost, accuracy]) + + avg_cost_test, acc_test = train_test(test_program, test_reader) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + step_id, avg_cost_test, acc_test)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + step_id, epoch_id, list(map(np.array, + metrics)))) + + if step_id == 30: + if params_dirname is not None: + fluid.io.save_inference_model(params_dirname, ["words"], + prediction, exe) + return ``` +### 训练过程处理 + +我们在训练主循环里打印了每一步输出,可以观察训练情况。 + ### 开始训练 -最后,我们传入训练循环数(num_epoch)和一些别的参数,调用 trainer.train 来开始训练。 +最后,我们启动训练主循环来开始训练。训练时间较长,如果为了更快的返回结果,可以通过调整损耗值范围或者训练步数,以减少准确率的代价来缩短训练时间。 ```python -trainer.train( - num_epochs=1, - event_handler=event_handler, - reader=train_reader, - feed_order=feed_order) +train_loop(fluid.default_main_program()) ``` ## 应用模型 ### 构建预测器 -传入`inference_program`和`params_dirname`来初始化一个预测器, `params_dirname`用来存放训练过程中的各个参数。 +和训练过程一样,我们需要创建一个预测过程,并使用训练得到的模型和参数来进行预测,`params_dirname`用来存放训练过程中的各个参数。 ```python -inferencer = Inferencer( - infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) +place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() +exe = fluid.Executor(place) +inference_scope = fluid.core.Scope() ``` ### 生成测试用输入数据 @@ -334,15 +343,25 @@ base_shape = [[len(c) for c in lod]] tensor_words = fluid.create_lod_tensor(lod, base_shape, place) ``` -## 应用模型 +## 应用模型并进行预测 现在我们可以对每一条评论进行正面或者负面的预测啦。 ```python -results = inferencer.infer({'words': tensor_words}) - -for i, r in enumerate(results[0]): - print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'") +with fluid.scope_guard(inference_scope): + + [inferencer, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(params_dirname, exe) + + assert feed_target_names[0] == "words" + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_words}, + fetch_list=fetch_targets, + return_numpy=False) + np_data = np.array(results[0]) + for i, r in enumerate(np_data): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") ``` diff --git a/06.understand_sentiment/index.cn.html b/06.understand_sentiment/index.cn.html index 68e37937b893e38e924ae4584b9749de3e2ebcce..b9de3f8b74218e9870aa7889a81624c0453a2b06 100644 --- a/06.understand_sentiment/index.cn.html +++ b/06.understand_sentiment/index.cn.html @@ -152,24 +152,16 @@ Paddle在`dataset/imdb.py`中提实现了imdb数据集的自动下载和读取 from __future__ import print_function import paddle import paddle.fluid as fluid -from functools import partial import numpy as np -try: - from paddle.fluid.contrib.trainer import * - from paddle.fluid.contrib.inferencer import * -except ImportError: - print( - "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", - file=sys.stderr) - from paddle.fluid.trainer import * - from paddle.fluid.inferencer import * +import sys +import math CLASS_DIM = 2 EMB_DIM = 128 HID_DIM = 512 STACKED_NUM = 3 BATCH_SIZE = 128 -USE_GPU = False + ``` @@ -254,8 +246,7 @@ def inference_program(word_dict): 在测试过程中,分类器会计算各个输出的概率。第一个返回的数值规定为 损耗(cost)。 ```python -def train_program(word_dict): - prediction = inference_program(word_dict) +def train_program(prediction): label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -300,59 +291,77 @@ train_reader = paddle.batch( 训练器需要一个训练程序和一个训练优化函数。 ```python -trainer = Trainer( - train_func=partial(train_program, word_dict), - place=place, - optimizer_func=optimizer_func) +exe = fluid.Executor(place) +prediction = inference_program(word_dict) +[avg_cost, accuracy] = train_program(prediction) +sgd_optimizer = optimizer_func() +sgd_optimizer.minimize(avg_cost) ``` -### 提供数据 +### 提供数据并构建主训练循环 `feed_order`用来定义每条产生的数据和`paddle.layer.data`之间的映射关系。比如,`imdb.train`产生的第一列的数据对应的是`words`这个特征。 -```python -feed_order = ['words', 'label'] -``` - -### 事件处理器 - -回调函数event_handler在一个之前定义好的事件发生后会被调用。例如,我们可以在每步训练结束后查看误差。 - ```python # Specify the directory path to save the parameters params_dirname = "understand_sentiment_conv.inference.model" -def event_handler(event): - if isinstance(event, EndStepEvent): - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, list(map(np.array, event.metrics)))) - - if event.step == 10: - trainer.save_params(params_dirname) - trainer.stop() +feed_order = ['words', 'label'] +pass_num = 1 + +def train_loop(main_program): + exe.run(fluid.default_startup_program()) + feed_var_list_loop = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder( + feed_list=feed_var_list_loop, place=place) + + test_program = fluid.default_main_program().clone(for_test=True) + + for epoch_id in range(pass_num): + for step_id, data in enumerate(train_reader()): + metrics = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_cost, accuracy]) + + avg_cost_test, acc_test = train_test(test_program, test_reader) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + step_id, avg_cost_test, acc_test)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + step_id, epoch_id, list(map(np.array, + metrics)))) + + if step_id == 30: + if params_dirname is not None: + fluid.io.save_inference_model(params_dirname, ["words"], + prediction, exe) + return ``` +### 训练过程处理 + +我们在训练主循环里打印了每一步输出,可以观察训练情况。 + ### 开始训练 -最后,我们传入训练循环数(num_epoch)和一些别的参数,调用 trainer.train 来开始训练。 +最后,我们启动训练主循环来开始训练。训练时间较长,如果为了更快的返回结果,可以通过调整损耗值范围或者训练步数,以减少准确率的代价来缩短训练时间。 ```python -trainer.train( - num_epochs=1, - event_handler=event_handler, - reader=train_reader, - feed_order=feed_order) +train_loop(fluid.default_main_program()) ``` ## 应用模型 ### 构建预测器 -传入`inference_program`和`params_dirname`来初始化一个预测器, `params_dirname`用来存放训练过程中的各个参数。 +和训练过程一样,我们需要创建一个预测过程,并使用训练得到的模型和参数来进行预测,`params_dirname`用来存放训练过程中的各个参数。 ```python -inferencer = Inferencer( - infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) +place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() +exe = fluid.Executor(place) +inference_scope = fluid.core.Scope() ``` ### 生成测试用输入数据 @@ -376,15 +385,25 @@ base_shape = [[len(c) for c in lod]] tensor_words = fluid.create_lod_tensor(lod, base_shape, place) ``` -## 应用模型 +## 应用模型并进行预测 现在我们可以对每一条评论进行正面或者负面的预测啦。 ```python -results = inferencer.infer({'words': tensor_words}) - -for i, r in enumerate(results[0]): - print("Predict probability of ", r[0], " to be positive and ", r[1], " to be negative for review \'", reviews_str[i], "\'") +with fluid.scope_guard(inference_scope): + + [inferencer, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(params_dirname, exe) + + assert feed_target_names[0] == "words" + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_words}, + fetch_list=fetch_targets, + return_numpy=False) + np_data = np.array(results[0]) + for i, r in enumerate(np_data): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") ``` diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py index f9435cc8d8343bf85a1472ccbeb1e21f79a6f7d2..7c203feb805e262cc5d027a9720ce555bd9e94db 100644 --- a/06.understand_sentiment/train_conv.py +++ b/06.understand_sentiment/train_conv.py @@ -14,22 +14,11 @@ from __future__ import print_function -import os import paddle import paddle.fluid as fluid -from functools import partial import numpy as np import sys - -try: - from paddle.fluid.contrib.trainer import * - from paddle.fluid.contrib.inferencer import * -except ImportError: - print( - "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", - file=sys.stderr) - from paddle.fluid.trainer import * - from paddle.fluid.inferencer import * +import math CLASS_DIM = 2 EMB_DIM = 128 @@ -66,8 +55,7 @@ def inference_program(word_dict): return net -def train_program(word_dict): - prediction = inference_program(word_dict) +def train_program(prediction): label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -79,8 +67,9 @@ def optimizer_func(): return fluid.optimizer.Adagrad(learning_rate=0.002) -def train(use_cuda, train_program, params_dirname): +def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() @@ -94,83 +83,131 @@ def train(use_cuda, train_program, params_dirname): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) - trainer = Trainer( - train_func=partial(train_program, word_dict), - place=place, - optimizer_func=optimizer_func) - feed_order = ['words', 'label'] + pass_num = 1 - def event_handler(event): - if isinstance(event, EndStepEvent): - if event.step % 10 == 0: - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=feed_order) - - print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.step, avg_cost, acc)) - - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, list(map(np.array, - event.metrics)))) - - elif isinstance(event, EndEpochEvent): - trainer.save_params(params_dirname) - - trainer.train( - num_epochs=1, - event_handler=event_handler, - reader=train_reader, - feed_order=feed_order) - - -def infer(use_cuda, inference_program, params_dirname=None): + main_program = fluid.default_main_program() + star_program = fluid.default_startup_program() + prediction = inference_program(word_dict) + train_func_outputs = train_program(prediction) + avg_cost = train_func_outputs[0] + + test_program = main_program.clone(for_test=True) + + # [avg_cost, accuracy] = train_program(prediction) + sgd_optimizer = optimizer_func() + sgd_optimizer.minimize(avg_cost) + exe = fluid.Executor(place) + + def train_test(program, reader): + count = 0 + feed_var_list = [ + program.global_block().var(var_name) for var_name in feed_order + ] + feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place) + test_exe = fluid.Executor(place) + accumulated = len(train_func_outputs) * [0] + for test_data in reader(): + avg_cost_np = test_exe.run( + program=program, + feed=feeder_test.feed(test_data), + fetch_list=train_func_outputs) + accumulated = [ + x[0] + x[1][0] for x in zip(accumulated, avg_cost_np) + ] + count += 1 + return [x / count for x in accumulated] + + def train_loop(): + + feed_var_list_loop = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) + exe.run(star_program) + + for epoch_id in range(pass_num): + for step_id, data in enumerate(train_reader()): + metrics = exe.run( + main_program, + feed=feeder.feed(data), + fetch_list=[var.name for var in train_func_outputs]) + print("step: {0}, Metrics {1}".format( + step_id, list(map(np.array, metrics)))) + if (step_id + 1) % 10 == 0: + avg_cost_test, acc_test = train_test(test_program, + test_reader) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + step_id, avg_cost_test, acc_test)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + step_id, epoch_id, list(map(np.array, metrics)))) + if math.isnan(float(metrics[0])): + sys.exit("got NaN loss, training failed.") + if params_dirname is not None: + fluid.io.save_inference_model(params_dirname, ["words"], + prediction, exe) + + train_loop() + + +def infer(use_cuda, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = Inferencer( - infer_func=partial(inference_program, word_dict), - param_path=params_dirname, - place=place) - - # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to - # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - - reviews_str = [ - 'read the book forget the movie', 'this is a great movie', - 'this is very bad' - ] - reviews = [c.split() for c in reviews_str] - - UNK = word_dict[''] - lod = [] - for c in reviews: - lod.append([word_dict.get(words, UNK) for words in c]) - - base_shape = [[len(c) for c in lod]] - - tensor_words = fluid.create_lod_tensor(lod, base_shape, place) - results = inferencer.infer({'words': tensor_words}) - - for i, r in enumerate(results[0]): - print("Predict probability of ", r[0], " to be positive and ", r[1], - " to be negative for review \'", reviews_str[i], "\'") + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inferencer, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(params_dirname, exe) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) + assert feed_target_names[0] == "words" + results = exe.run( + inferencer, + feed={feed_target_names[0]: tensor_words}, + fetch_list=fetch_targets, + return_numpy=False) + np_data = np.array(results[0]) + for i, r in enumerate(np_data): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return params_dirname = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, params_dirname) - infer(use_cuda, inference_program, params_dirname) + train(use_cuda, params_dirname) + infer(use_cuda, params_dirname) if __name__ == '__main__': diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py index 5f6f79d5f0f6cfacdf84cd9c56d49e78044abe9e..368f4e626d95c0cca47fa005d131598edb1e5fb0 100644 --- a/06.understand_sentiment/train_dyn_rnn.py +++ b/06.understand_sentiment/train_dyn_rnn.py @@ -14,28 +14,16 @@ from __future__ import print_function -import os import paddle import paddle.fluid as fluid -from functools import partial import numpy as np import sys - -try: - from paddle.fluid.contrib.trainer import * - from paddle.fluid.contrib.inferencer import * -except ImportError: - print( - "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", - file=sys.stderr) - from paddle.fluid.trainer import * - from paddle.fluid.inferencer import * +import math CLASS_DIM = 2 EMB_DIM = 128 BATCH_SIZE = 128 LSTM_SIZE = 128 -USE_GPU = False def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): @@ -83,8 +71,7 @@ def inference_program(word_dict): return pred -def train_program(word_dict): - prediction = inference_program(word_dict) +def train_program(prediction): label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -96,7 +83,7 @@ def optimizer_func(): return fluid.optimizer.Adagrad(learning_rate=0.002) -def train(use_cuda, train_program, params_dirname): +def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() @@ -111,83 +98,128 @@ def train(use_cuda, train_program, params_dirname): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) - trainer = Trainer( - train_func=partial(train_program, word_dict), - place=place, - optimizer_func=optimizer_func) - feed_order = ['words', 'label'] + pass_num = 1 - def event_handler(event): - if isinstance(event, EndStepEvent): - if event.step % 10 == 0: - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=feed_order) - - print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.step, avg_cost, acc)) - - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, list(map(np.array, - event.metrics)))) - - elif isinstance(event, EndEpochEvent): - trainer.save_params(params_dirname) - - trainer.train( - num_epochs=1, - event_handler=event_handler, - reader=train_reader, - feed_order=feed_order) - - -def infer(use_cuda, inference_program, params_dirname=None): + main_program = fluid.default_main_program() + star_program = fluid.default_startup_program() + prediction = inference_program(word_dict) + train_func_outputs = train_program(prediction) + avg_cost = train_func_outputs[0] + + test_program = main_program.clone(for_test=True) + + sgd_optimizer = optimizer_func() + sgd_optimizer.minimize(avg_cost) + exe = fluid.Executor(place) + + def train_test(program, reader): + count = 0 + feed_var_list = [ + program.global_block().var(var_name) for var_name in feed_order + ] + feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place) + test_exe = fluid.Executor(place) + accumulated = len(train_func_outputs) * [0] + for test_data in reader(): + avg_cost_np = test_exe.run( + program=program, + feed=feeder_test.feed(test_data), + fetch_list=train_func_outputs) + accumulated = [ + x[0] + x[1][0] for x in zip(accumulated, avg_cost_np) + ] + count += 1 + return [x / count for x in accumulated] + + def train_loop(): + + feed_var_list_loop = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) + exe.run(fluid.default_startup_program()) + + for epoch_id in range(pass_num): + for step_id, data in enumerate(train_reader()): + metrics = exe.run( + main_program, + feed=feeder.feed(data), + fetch_list=[var.name for var in train_func_outputs]) + if (step_id + 1) % 10 == 0: + + #avg_cost_test, acc_test = train_test(test_program, test_reader) + #print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + # step_id, avg_cost_test, acc_test)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + step_id, epoch_id, list(map(np.array, metrics)))) + if math.isnan(float(metrics[0])): + sys.exit("got NaN loss, training failed.") + if params_dirname is not None: + fluid.io.save_inference_model(params_dirname, ["words"], + prediction, exe) + + train_loop() + + +def infer(use_cuda, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = Inferencer( - infer_func=partial(inference_program, word_dict), - param_path=params_dirname, - place=place) - - # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to - # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - - reviews_str = [ - 'read the book forget the movie', 'this is a great movie', - 'this is very bad' - ] - reviews = [c.split() for c in reviews_str] - - UNK = word_dict[''] - lod = [] - for c in reviews: - lod.append([word_dict.get(words, UNK) for words in c]) - - base_shape = [[len(c) for c in lod]] - - tensor_words = fluid.create_lod_tensor(lod, base_shape, place) - results = inferencer.infer({'words': tensor_words}) - - for i, r in enumerate(results[0]): - print("Predict probability of ", r[0], " to be positive and ", r[1], - " to be negative for review \'", reviews_str[i], "\'") + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inferencer, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(params_dirname, exe) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) + assert feed_target_names[0] == "words" + results = exe.run( + inferencer, + feed={feed_target_names[0]: tensor_words}, + fetch_list=fetch_targets, + return_numpy=False) + np_data = np.array(results[0]) + for i, r in enumerate(np_data): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return params_dirname = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, params_dirname) - infer(use_cuda, inference_program, params_dirname) + train(use_cuda, params_dirname) + infer(use_cuda, params_dirname) if __name__ == '__main__': diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py index 8304979d7b7d29f620dc85a7d209792a065f84b9..66fcdf0933bfb8319e9406a05cfc30c36462d0b7 100644 --- a/06.understand_sentiment/train_stacked_lstm.py +++ b/06.understand_sentiment/train_stacked_lstm.py @@ -17,19 +17,9 @@ from __future__ import print_function import os import paddle import paddle.fluid as fluid -from functools import partial import numpy as np import sys - -try: - from paddle.fluid.contrib.trainer import * - from paddle.fluid.contrib.inferencer import * -except ImportError: - print( - "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", - file=sys.stderr) - from paddle.fluid.trainer import * - from paddle.fluid.inferencer import * +import math CLASS_DIM = 2 EMB_DIM = 128 @@ -74,8 +64,8 @@ def inference_program(word_dict): return net -def train_program(word_dict): - prediction = inference_program(word_dict) +def train_program(prediction): + # prediction = inference_program(word_dict) label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -87,8 +77,9 @@ def optimizer_func(): return fluid.optimizer.Adagrad(learning_rate=0.002) -def train(use_cuda, train_program, params_dirname): +def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() @@ -102,83 +93,131 @@ def train(use_cuda, train_program, params_dirname): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) - trainer = Trainer( - train_func=partial(train_program, word_dict), - place=place, - optimizer_func=optimizer_func) - feed_order = ['words', 'label'] + pass_num = 1 - def event_handler(event): - if isinstance(event, EndStepEvent): - if event.step % 10 == 0: - avg_cost, acc = trainer.test( - reader=test_reader, feed_order=feed_order) - - print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( - event.step, avg_cost, acc)) - - print("Step {0}, Epoch {1} Metrics {2}".format( - event.step, event.epoch, list(map(np.array, - event.metrics)))) - - elif isinstance(event, EndEpochEvent): - trainer.save_params(params_dirname) - - trainer.train( - num_epochs=1, - event_handler=event_handler, - reader=train_reader, - feed_order=feed_order) - - -def infer(use_cuda, inference_program, params_dirname=None): + main_program = fluid.default_main_program() + star_program = fluid.default_startup_program() + prediction = inference_program(word_dict) + train_func_outputs = train_program(prediction) + avg_cost = train_func_outputs[0] + + test_program = main_program.clone(for_test=True) + + # [avg_cost, accuracy] = train_program(prediction) + sgd_optimizer = optimizer_func() + sgd_optimizer.minimize(avg_cost) + exe = fluid.Executor(place) + + def train_test(program, reader): + count = 0 + feed_var_list = [ + program.global_block().var(var_name) for var_name in feed_order + ] + feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place) + test_exe = fluid.Executor(place) + accumulated = len(train_func_outputs) * [0] + for test_data in reader(): + avg_cost_np = test_exe.run( + program=program, + feed=feeder_test.feed(test_data), + fetch_list=train_func_outputs) + accumulated = [ + x[0] + x[1][0] for x in zip(accumulated, avg_cost_np) + ] + count += 1 + return [x / count for x in accumulated] + + def train_loop(): + + feed_var_list_loop = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) + exe.run(fluid.default_startup_program()) + + for epoch_id in range(pass_num): + for step_id, data in enumerate(train_reader()): + metrics = exe.run( + main_program, + feed=feeder.feed(data), + fetch_list=[var.name for var in train_func_outputs]) + print("step: {0}, Metrics {1}".format( + step_id, list(map(np.array, metrics)))) + if (step_id + 1) % 10 == 0: + avg_cost_test, acc_test = train_test(test_program, + test_reader) + print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + step_id, avg_cost_test, acc_test)) + + print("Step {0}, Epoch {1} Metrics {2}".format( + step_id, epoch_id, list(map(np.array, metrics)))) + if math.isnan(float(metrics[0])): + sys.exit("got NaN loss, training failed.") + if params_dirname is not None: + fluid.io.save_inference_model(params_dirname, ["words"], + prediction, exe) + + train_loop() + + +def infer(use_cuda, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = Inferencer( - infer_func=partial(inference_program, word_dict), - param_path=params_dirname, - place=place) - - # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to - # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - - reviews_str = [ - 'read the book forget the movie', 'this is a great movie', - 'this is very bad' - ] - reviews = [c.split() for c in reviews_str] - - UNK = word_dict[''] - lod = [] - for c in reviews: - lod.append([word_dict.get(words, UNK) for words in c]) - - base_shape = [[len(c) for c in lod]] - - tensor_words = fluid.create_lod_tensor(lod, base_shape, place) - results = inferencer.infer({'words': tensor_words}) - - for i, r in enumerate(results[0]): - print("Predict probability of ", r[0], " to be positive and ", r[1], - " to be negative for review \'", reviews_str[i], "\'") + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inferencer, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(params_dirname, exe) + + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + reviews_str = [ + 'read the book forget the movie', 'this is a great movie', + 'this is very bad' + ] + reviews = [c.split() for c in reviews_str] + + UNK = word_dict[''] + lod = [] + for c in reviews: + lod.append([word_dict.get(words, UNK) for words in c]) + + base_shape = [[len(c) for c in lod]] + + tensor_words = fluid.create_lod_tensor(lod, base_shape, place) + assert feed_target_names[0] == "words" + results = exe.run( + inferencer, + feed={feed_target_names[0]: tensor_words}, + fetch_list=fetch_targets, + return_numpy=False) + np_data = np.array(results[0]) + for i, r in enumerate(np_data): + print("Predict probability of ", r[0], " to be positive and ", r[1], + " to be negative for review \'", reviews_str[i], "\'") def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return params_dirname = "understand_sentiment_stacked_lstm.inference.model" - train(use_cuda, train_program, params_dirname) - infer(use_cuda, inference_program, params_dirname) + train(use_cuda, params_dirname) + infer(use_cuda, params_dirname) if __name__ == '__main__':