提交 8862db07 编写于 作者: L liaogang

fix conflict

此差异已折叠。
...@@ -231,7 +231,7 @@ trainer = paddle.trainer.SGD(cost=cost, ...@@ -231,7 +231,7 @@ trainer = paddle.trainer.SGD(cost=cost,
下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B —— reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。 下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B —— reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。
`batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader —— 在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minbatch。 `batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader —— 在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minibatch。
`event_handler_plot`可以用来在训练过程中画图如下: `event_handler_plot`可以用来在训练过程中画图如下:
......
此差异已折叠。
...@@ -273,7 +273,7 @@ trainer = paddle.trainer.SGD(cost=cost, ...@@ -273,7 +273,7 @@ trainer = paddle.trainer.SGD(cost=cost,
下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B —— reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。 下面`shuffle`是一个reader decorator,它接受一个reader A,返回另一个reader B —— reader B 每次读入`buffer_size`条训练数据到一个buffer里,然后随机打乱其顺序,并且逐条输出。
`batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader —— 在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minbatch。 `batch`是一个特殊的decorator,它的输入是一个reader,输出是一个batched reader —— 在PaddlePaddle里,一个reader每次yield一条训练数据,而一个batched reader每次yield一个minibatch。
`event_handler_plot`可以用来在训练过程中画图如下: `event_handler_plot`可以用来在训练过程中画图如下:
......
...@@ -48,35 +48,36 @@ def convolutional_neural_network(img): ...@@ -48,35 +48,36 @@ def convolutional_neural_network(img):
return predict return predict
paddle.init(use_gpu=False, trainer_count=1) def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology # define network topology
images = paddle.layer.data( images = paddle.layer.data(
name='pixel', type=paddle.data_type.dense_vector(784)) name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(name='label', type=paddle.data_type.integer_value(10)) label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
# Here we can build the prediction network in different ways. Please # Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line. # choose one by uncomment corresponding line.
# predict = softmax_regression(images) # predict = softmax_regression(images)
# predict = multilayer_perceptron(images) # predict = multilayer_perceptron(images)
predict = convolutional_neural_network(images) predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label) cost = paddle.layer.classification_cost(input=predict, label=label)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0, learning_rate=0.1 / 128.0,
momentum=0.9, momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer) cost=cost, parameters=parameters, update_equation=optimizer)
lists = [] lists = []
def event_handler(event):
def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
...@@ -88,37 +89,38 @@ def event_handler(event): ...@@ -88,37 +89,38 @@ def event_handler(event):
result = trainer.test(reader=paddle.batch( result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128)) paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (event.pass_id, result.cost, print "Test with Pass %d, Cost %f, %s\n" % (
result.metrics) event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost, lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator'])) result.metrics['classification_error_evaluator']))
trainer.train(
trainer.train(
reader=paddle.batch( reader=paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=8192), paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128), batch_size=128),
event_handler=event_handler, event_handler=event_handler,
num_passes=1) num_passes=5)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
def load_image(file): def load_image(file):
im = Image.open(file).convert('L') im = Image.open(file).convert('L')
im = im.resize((28, 28), Image.ANTIALIAS) im = im.resize((28, 28), Image.ANTIALIAS)
im = np.array(im).astype(np.float32).flatten() im = np.array(im).astype(np.float32).flatten()
im = im / 255.0 im = im / 255.0
return im return im
test_data = []
test_data.append((load_image('image/infer_3.png'), ))
test_data = [] probs = paddle.infer(
test_data.append((load_image('image/infer_3.png'), ))
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data) output_layer=predict, parameters=parameters, input=test_data)
lab = np.argsort(-probs) # probs and lab are the results of one batch data lab = np.argsort(-probs) # probs and lab are the results of one batch data
print "Label of image/infer_3.png is: %d" % lab[0][0] print "Label of image/infer_3.png is: %d" % lab[0][0]
if __name__ == '__main__':
main()
...@@ -211,6 +211,7 @@ Here we fetch the dictionary, and print its size: ...@@ -211,6 +211,7 @@ Here we fetch the dictionary, and print its size:
```python ```python
import math import math
import numpy as np import numpy as np
import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
...@@ -373,11 +374,11 @@ crf_cost = paddle.layer.crf( ...@@ -373,11 +374,11 @@ crf_cost = paddle.layer.crf(
```python ```python
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param(name='crfw')) param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
``` ```
## Train model ## Train model
...@@ -387,7 +388,7 @@ crf_dec = paddle.layer.crf_decoding( ...@@ -387,7 +388,7 @@ crf_dec = paddle.layer.crf_decoding(
All necessary parameters will be traced created given output layers that we need to use. All necessary parameters will be traced created given output layers that we need to use.
```python ```python
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
``` ```
We can print out parameter name. It will be generated if not specified. We can print out parameter name. It will be generated if not specified.
...@@ -420,7 +421,8 @@ optimizer = paddle.optimizer.Momentum( ...@@ -420,7 +421,8 @@ optimizer = paddle.optimizer.Momentum(
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
extra_layers=crf_dec)
``` ```
### Trainer ### Trainer
...@@ -455,8 +457,19 @@ feeding = { ...@@ -455,8 +457,19 @@ feeding = {
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost, event.metrics)
if event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
``` ```
`trainer.train` will train the model. `trainer.train` will train the model.
...@@ -469,6 +482,42 @@ trainer.train( ...@@ -469,6 +482,42 @@ trainer.train(
feeding=feeding) feeding=feeding)
``` ```
### Application
Aftern training is done, we need to select an optimal model based one performance index to do inference. In this task, one can simply select the model with the least number of marks on the test set. The `paddle.layer.crf_decoding` layer is used in the inference, but its inputs does not include the ground truth label.
```python
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
```
Here, using one testing sample as an example.
```python
test_creator = paddle.dataset.conll05.test()
test_data = []
for item in test_creator():
test_data.append(item[0:8])
if len(test_data) == 1:
break
```
The inference interface `paddle.infer` returns the index of predicting labels. Then printing the tagging results based dictionary `labels_reverse`.
```python
labs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data, field='id')
assert len(labs) == len(test_data[0][0])
labels_reverse={}
for (k,v) in label_dict.items():
labels_reverse[v]=k
pre_lab = [labels_reverse[i] for i in labs]
print pre_lab
```
## Conclusion ## Conclusion
Semantic Role Labeling is an important intermediate step in a wide range of natural language processing tasks. In this tutorial, we use SRL as an example to illustrate using PaddlePaddle to do sequence tagging tasks. The models proposed are from our published paper\[[10](#Reference)\]. We only use test data for illustration since the training data on the CoNLL 2005 dataset is not completely public. This aims to propose an end-to-end neural network model with fewer dependencies on natural language processing tools but is comparable, or even better than traditional models in terms of performance. Please check out our paper for more information and discussions. Semantic Role Labeling is an important intermediate step in a wide range of natural language processing tasks. In this tutorial, we use SRL as an example to illustrate using PaddlePaddle to do sequence tagging tasks. The models proposed are from our published paper\[[10](#Reference)\]. We only use test data for illustration since the training data on the CoNLL 2005 dataset is not completely public. This aims to propose an end-to-end neural network model with fewer dependencies on natural language processing tools but is comparable, or even better than traditional models in terms of performance. Please check out our paper for more information and discussions.
......
...@@ -189,6 +189,7 @@ conll05st-release/ ...@@ -189,6 +189,7 @@ conll05st-release/
```python ```python
import math import math
import numpy as np import numpy as np
import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
...@@ -350,11 +351,11 @@ crf_cost = paddle.layer.crf( ...@@ -350,11 +351,11 @@ crf_cost = paddle.layer.crf(
```python ```python
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param(name='crfw')) param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
``` ```
## 训练模型 ## 训练模型
...@@ -365,7 +366,7 @@ crf_dec = paddle.layer.crf_decoding( ...@@ -365,7 +366,7 @@ crf_dec = paddle.layer.crf_decoding(
```python ```python
# create parameters # create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
``` ```
可以打印参数名字,如果在网络配置中没有指定名字,则默认生成。 可以打印参数名字,如果在网络配置中没有指定名字,则默认生成。
...@@ -400,7 +401,8 @@ optimizer = paddle.optimizer.Momentum( ...@@ -400,7 +401,8 @@ optimizer = paddle.optimizer.Momentum(
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
extra_layers=crf_dec)
``` ```
### 训练 ### 训练
...@@ -436,8 +438,19 @@ feeding = { ...@@ -436,8 +438,19 @@ feeding = {
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost, event.metrics)
if event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
``` ```
通过`trainer.train`函数训练: 通过`trainer.train`函数训练:
...@@ -450,6 +463,41 @@ trainer.train( ...@@ -450,6 +463,41 @@ trainer.train(
feeding=feeding) feeding=feeding)
``` ```
### 应用模型
训练完成之后,需要依据某个我们关心的性能指标选择最优的模型进行预测,可以简单的选择测试集上标记错误最少的那个模型。预测时使用 `paddle.layer.crf_decoding`,和训练不同的是,该层没有正确的标签层作为输入。如下所示:
```python
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
```
这里选用测试集的一条数据作为示例。
```python
test_creator = paddle.dataset.conll05.test()
test_data = []
for item in test_creator():
test_data.append(item[0:8])
if len(test_data) == 1:
break
```
推断接口`paddle.infer`返回标签的索引,并查询词典`labels_reverse`,打印出标记的结果。
```python
labs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data, field='id')
assert len(labs) == len(test_data[0][0])
labels_reverse={}
for (k,v) in label_dict.items():
labels_reverse[v]=k
pre_lab = [labels_reverse[i] for i in labs]
print pre_lab
```
## 总结 ## 总结
语义角色标注是许多自然语言理解任务的重要中间步骤。这篇教程中我们以语义角色标注任务为例,介绍如何利用PaddlePaddle进行序列标注任务。教程中所介绍的模型来自我们发表的论文\[[10](#参考文献)\]。由于 CoNLL 2005 SRL任务的训练数据目前并非完全开放,教程中只使用测试数据作为示例。在这个过程中,我们希望减少对其它自然语言处理工具的依赖,利用神经网络数据驱动、端到端学习的能力,得到一个和传统方法可比、甚至更好的模型。在论文中我们证实了这种可能性。关于模型更多的信息和讨论可以在论文中找到。 语义角色标注是许多自然语言理解任务的重要中间步骤。这篇教程中我们以语义角色标注任务为例,介绍如何利用PaddlePaddle进行序列标注任务。教程中所介绍的模型来自我们发表的论文\[[10](#参考文献)\]。由于 CoNLL 2005 SRL任务的训练数据目前并非完全开放,教程中只使用测试数据作为示例。在这个过程中,我们希望减少对其它自然语言处理工具的依赖,利用神经网络数据驱动、端到端学习的能力,得到一个和传统方法可比、甚至更好的模型。在论文中我们证实了这种可能性。关于模型更多的信息和讨论可以在论文中找到。
......
...@@ -253,6 +253,7 @@ Here we fetch the dictionary, and print its size: ...@@ -253,6 +253,7 @@ Here we fetch the dictionary, and print its size:
```python ```python
import math import math
import numpy as np import numpy as np
import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
...@@ -415,11 +416,11 @@ crf_cost = paddle.layer.crf( ...@@ -415,11 +416,11 @@ crf_cost = paddle.layer.crf(
```python ```python
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param(name='crfw')) param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
``` ```
## Train model ## Train model
...@@ -429,7 +430,7 @@ crf_dec = paddle.layer.crf_decoding( ...@@ -429,7 +430,7 @@ crf_dec = paddle.layer.crf_decoding(
All necessary parameters will be traced created given output layers that we need to use. All necessary parameters will be traced created given output layers that we need to use.
```python ```python
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
``` ```
We can print out parameter name. It will be generated if not specified. We can print out parameter name. It will be generated if not specified.
...@@ -462,7 +463,8 @@ optimizer = paddle.optimizer.Momentum( ...@@ -462,7 +463,8 @@ optimizer = paddle.optimizer.Momentum(
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
extra_layers=crf_dec)
``` ```
### Trainer ### Trainer
...@@ -497,8 +499,19 @@ feeding = { ...@@ -497,8 +499,19 @@ feeding = {
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost, event.metrics)
if event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
``` ```
`trainer.train` will train the model. `trainer.train` will train the model.
...@@ -511,6 +524,42 @@ trainer.train( ...@@ -511,6 +524,42 @@ trainer.train(
feeding=feeding) feeding=feeding)
``` ```
### Application
Aftern training is done, we need to select an optimal model based one performance index to do inference. In this task, one can simply select the model with the least number of marks on the test set. The `paddle.layer.crf_decoding` layer is used in the inference, but its inputs does not include the ground truth label.
```python
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
```
Here, using one testing sample as an example.
```python
test_creator = paddle.dataset.conll05.test()
test_data = []
for item in test_creator():
test_data.append(item[0:8])
if len(test_data) == 1:
break
```
The inference interface `paddle.infer` returns the index of predicting labels. Then printing the tagging results based dictionary `labels_reverse`.
```python
labs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data, field='id')
assert len(labs) == len(test_data[0][0])
labels_reverse={}
for (k,v) in label_dict.items():
labels_reverse[v]=k
pre_lab = [labels_reverse[i] for i in labs]
print pre_lab
```
## Conclusion ## Conclusion
Semantic Role Labeling is an important intermediate step in a wide range of natural language processing tasks. In this tutorial, we use SRL as an example to illustrate using PaddlePaddle to do sequence tagging tasks. The models proposed are from our published paper\[[10](#Reference)\]. We only use test data for illustration since the training data on the CoNLL 2005 dataset is not completely public. This aims to propose an end-to-end neural network model with fewer dependencies on natural language processing tools but is comparable, or even better than traditional models in terms of performance. Please check out our paper for more information and discussions. Semantic Role Labeling is an important intermediate step in a wide range of natural language processing tasks. In this tutorial, we use SRL as an example to illustrate using PaddlePaddle to do sequence tagging tasks. The models proposed are from our published paper\[[10](#Reference)\]. We only use test data for illustration since the training data on the CoNLL 2005 dataset is not completely public. This aims to propose an end-to-end neural network model with fewer dependencies on natural language processing tools but is comparable, or even better than traditional models in terms of performance. Please check out our paper for more information and discussions.
......
...@@ -231,6 +231,7 @@ conll05st-release/ ...@@ -231,6 +231,7 @@ conll05st-release/
```python ```python
import math import math
import numpy as np import numpy as np
import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
...@@ -392,11 +393,11 @@ crf_cost = paddle.layer.crf( ...@@ -392,11 +393,11 @@ crf_cost = paddle.layer.crf(
```python ```python
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param(name='crfw')) param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
``` ```
## 训练模型 ## 训练模型
...@@ -407,7 +408,7 @@ crf_dec = paddle.layer.crf_decoding( ...@@ -407,7 +408,7 @@ crf_dec = paddle.layer.crf_decoding(
```python ```python
# create parameters # create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
``` ```
可以打印参数名字,如果在网络配置中没有指定名字,则默认生成。 可以打印参数名字,如果在网络配置中没有指定名字,则默认生成。
...@@ -442,7 +443,8 @@ optimizer = paddle.optimizer.Momentum( ...@@ -442,7 +443,8 @@ optimizer = paddle.optimizer.Momentum(
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
extra_layers=crf_dec)
``` ```
### 训练 ### 训练
...@@ -478,8 +480,19 @@ feeding = { ...@@ -478,8 +480,19 @@ feeding = {
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost, event.metrics)
if event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
``` ```
通过`trainer.train`函数训练: 通过`trainer.train`函数训练:
...@@ -492,6 +505,41 @@ trainer.train( ...@@ -492,6 +505,41 @@ trainer.train(
feeding=feeding) feeding=feeding)
``` ```
### 应用模型
训练完成之后,需要依据某个我们关心的性能指标选择最优的模型进行预测,可以简单的选择测试集上标记错误最少的那个模型。预测时使用 `paddle.layer.crf_decoding`,和训练不同的是,该层没有正确的标签层作为输入。如下所示:
```python
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
```
这里选用测试集的一条数据作为示例。
```python
test_creator = paddle.dataset.conll05.test()
test_data = []
for item in test_creator():
test_data.append(item[0:8])
if len(test_data) == 1:
break
```
推断接口`paddle.infer`返回标签的索引,并查询词典`labels_reverse`,打印出标记的结果。
```python
labs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data, field='id')
assert len(labs) == len(test_data[0][0])
labels_reverse={}
for (k,v) in label_dict.items():
labels_reverse[v]=k
pre_lab = [labels_reverse[i] for i in labs]
print pre_lab
```
## 总结 ## 总结
语义角色标注是许多自然语言理解任务的重要中间步骤。这篇教程中我们以语义角色标注任务为例,介绍如何利用PaddlePaddle进行序列标注任务。教程中所介绍的模型来自我们发表的论文\[[10](#参考文献)\]。由于 CoNLL 2005 SRL任务的训练数据目前并非完全开放,教程中只使用测试数据作为示例。在这个过程中,我们希望减少对其它自然语言处理工具的依赖,利用神经网络数据驱动、端到端学习的能力,得到一个和传统方法可比、甚至更好的模型。在论文中我们证实了这种可能性。关于模型更多的信息和讨论可以在论文中找到。 语义角色标注是许多自然语言理解任务的重要中间步骤。这篇教程中我们以语义角色标注任务为例,介绍如何利用PaddlePaddle进行序列标注任务。教程中所介绍的模型来自我们发表的论文\[[10](#参考文献)\]。由于 CoNLL 2005 SRL任务的训练数据目前并非完全开放,教程中只使用测试数据作为示例。在这个过程中,我们希望减少对其它自然语言处理工具的依赖,利用神经网络数据驱动、端到端学习的能力,得到一个和传统方法可比、甚至更好的模型。在论文中我们证实了这种可能性。关于模型更多的信息和讨论可以在论文中找到。
......
import math import math
import numpy as np import numpy as np
import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
import paddle.v2.evaluator as evaluator
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
def db_lstm(): mark_dict_len = 2
word_dict, verb_dict, label_dict = conll05.get_dict() word_dim = 32
word_dict_len = len(word_dict) mark_dim = 5
label_dict_len = len(label_dict) hidden_dim = 512
pred_len = len(verb_dict) depth = 8
default_std = 1 / math.sqrt(hidden_dim) / 3.0
mix_hidden_lr = 1e-3
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features def d_type(size):
def d_type(size):
return paddle.data_type.integer_value_sequence(size) return paddle.data_type.integer_value_sequence(size)
def db_lstm():
#8 features
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
...@@ -30,11 +35,8 @@ def db_lstm(): ...@@ -30,11 +35,8 @@ def db_lstm():
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
std_0 = paddle.attr.Param(initial_std=0.) std_0 = paddle.attr.Param(initial_std=0.)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
std_default = paddle.attr.Param(initial_std=default_std) std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding( predicate_embedding = paddle.layer.embedding(
...@@ -60,7 +62,6 @@ def db_lstm(): ...@@ -60,7 +62,6 @@ def db_lstm():
input=emb, param_attr=std_default) for emb in emb_layers input=emb, param_attr=std_default) for emb in emb_layers
]) ])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param( hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr) initial_std=default_std, learning_rate=mix_hidden_lr)
...@@ -108,21 +109,7 @@ def db_lstm(): ...@@ -108,21 +109,7 @@ def db_lstm():
input=input_tmp[1], param_attr=lstm_para_attr) input=input_tmp[1], param_attr=lstm_para_attr)
], ) ], )
crf_cost = paddle.layer.crf( return feature_out
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w): def load_parameter(file_name, h, w):
...@@ -135,10 +122,24 @@ def main(): ...@@ -135,10 +122,24 @@ def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
# define network topology # define network topology
crf_cost, crf_dec = db_lstm() feature_out = db_lstm()
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
crf_cost = paddle.layer.crf(
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
# create parameters # create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
# create optimizer # create optimizer
...@@ -150,7 +151,10 @@ def main(): ...@@ -150,7 +151,10 @@ def main():
average_window=0.5, max_average_window=10000), ) average_window=0.5, max_average_window=10000), )
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(
cost=crf_cost, parameters=parameters, update_equation=optimizer) cost=crf_cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=crf_dec)
reader = paddle.batch( reader = paddle.batch(
paddle.reader.shuffle(conll05.test(), buf_size=8192), batch_size=10) paddle.reader.shuffle(conll05.test(), buf_size=8192), batch_size=10)
...@@ -170,15 +174,50 @@ def main(): ...@@ -170,15 +174,50 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost, event.metrics)
if event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, Batch %d, %s" % (
event.pass_id, event.batch_id, result.metrics)
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
trainer.train( trainer.train(
reader=reader, reader=reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=1,
feeding=feeding) feeding=feeding)
test_creator = paddle.dataset.conll05.test()
test_data = []
for item in test_creator():
test_data.append(item[0:8])
if len(test_data) == 1:
break
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
probs = paddle.infer(
output_layer=predict,
parameters=parameters,
input=test_data,
field='id')
assert len(probs) == len(test_data[0][0])
labels_reverse = {}
for (k, v) in label_dict.items():
labels_reverse[v] = k
pre_lab = [labels_reverse[i] for i in probs]
print pre_lab
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -238,6 +238,7 @@ mov_categories = paddle.layer.data( ...@@ -238,6 +238,7 @@ mov_categories = paddle.layer.data(
len(paddle.dataset.movielens.movie_categories()))) len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
mov_title_id = paddle.layer.data( mov_title_id = paddle.layer.data(
name='movie_title', name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
......
...@@ -244,6 +244,7 @@ mov_categories = paddle.layer.data( ...@@ -244,6 +244,7 @@ mov_categories = paddle.layer.data(
len(paddle.dataset.movielens.movie_categories()))) len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
mov_title_id = paddle.layer.data( mov_title_id = paddle.layer.data(
name='movie_title', name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
......
...@@ -280,6 +280,7 @@ mov_categories = paddle.layer.data( ...@@ -280,6 +280,7 @@ mov_categories = paddle.layer.data(
len(paddle.dataset.movielens.movie_categories()))) len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
mov_title_id = paddle.layer.data( mov_title_id = paddle.layer.data(
name='movie_title', name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
......
...@@ -286,6 +286,7 @@ mov_categories = paddle.layer.data( ...@@ -286,6 +286,7 @@ mov_categories = paddle.layer.data(
len(paddle.dataset.movielens.movie_categories()))) len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
mov_title_id = paddle.layer.data( mov_title_id = paddle.layer.data(
name='movie_title', name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
......
...@@ -3,9 +3,7 @@ import cPickle ...@@ -3,9 +3,7 @@ import cPickle
import copy import copy
def main(): def get_usr_combined_features():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data( uid = paddle.layer.data(
name='user_id', name='user_id',
type=paddle.data_type.integer_value( type=paddle.data_type.integer_value(
...@@ -36,7 +34,11 @@ def main(): ...@@ -36,7 +34,11 @@ def main():
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
size=200, size=200,
act=paddle.activation.Tanh()) act=paddle.activation.Tanh())
return usr_combined_features
def get_mov_combined_features():
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
mov_id = paddle.layer.data( mov_id = paddle.layer.data(
name='movie_id', name='movie_id',
type=paddle.data_type.integer_value( type=paddle.data_type.integer_value(
...@@ -61,7 +63,13 @@ def main(): ...@@ -61,7 +63,13 @@ def main():
input=[mov_fc, mov_categories_hidden, mov_title_conv], input=[mov_fc, mov_categories_hidden, mov_title_conv],
size=200, size=200,
act=paddle.activation.Tanh()) act=paddle.activation.Tanh())
return mov_combined_features
def main():
paddle.init(use_gpu=False)
usr_combined_features = get_usr_combined_features()
mov_combined_features = get_mov_combined_features()
inference = paddle.layer.cos_sim( inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5) a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.mse_cost( cost = paddle.layer.mse_cost(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册