未验证 提交 343baa60 编写于 作者: Y Yibing Liu 提交者: GitHub

Upgrade w2v & srl's api (#828)

* Upgrade w2v & srl's api

* Upgrade label semantic roles api
上级 729f0c95
......@@ -262,32 +262,32 @@ dict_size = len(word_dict)
```
更大的`BATCH_SIZE`将使得训练更快收敛,但也会消耗更多内存。由于词向量计算规模较大,如果环境允许,请开启使用GPU进行训练,能更快得到结果。
不同于之前的PaddlePaddle v2版本,在新的Fluid版本里,我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`,我们就可以直接用它来构造 N-gram 神经网络。
不同于之前的PaddlePaddle v2版本,在新的Fluid版本里,我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`,我们就可以直接用它来构造 N-gram 神经网络。
- 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏,我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
```python
def inference_program(words, is_sparse):
embed_first = fluid.layers.embedding(
embed_first = fluid.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
embed_second = fluid.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
embed_third = fluid.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_fourth = fluid.layers.embedding(
embed_fourth = fluid.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
......@@ -310,7 +310,7 @@ def train_program(predict_word):
# 'next_word'的定义必须要在inference_program的声明之后,
# 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
# thirdw, fourthw], 这是不正确的.
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
......@@ -335,11 +335,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
test_reader = paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
......
......@@ -227,32 +227,32 @@ dict_size = len(word_dict)
```
A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
- Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
```python
def inference_program(words, is_sparse):
embed_first = fluid.layers.embedding(
embed_first = fluid.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
embed_second = fluid.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
embed_third = fluid.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_fourth = fluid.layers.embedding(
embed_fourth = fluid.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
......@@ -275,7 +275,7 @@ def train_program(predict_word):
# The definition of'next_word' must be after the declaration of inference_program.
# Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
#thirdw, fourthw], This is not true.
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
......@@ -300,11 +300,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
test_reader = paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
......
......@@ -304,32 +304,32 @@ dict_size = len(word_dict)
```
更大的`BATCH_SIZE`将使得训练更快收敛,但也会消耗更多内存。由于词向量计算规模较大,如果环境允许,请开启使用GPU进行训练,能更快得到结果。
不同于之前的PaddlePaddle v2版本,在新的Fluid版本里,我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`,我们就可以直接用它来构造 N-gram 神经网络。
不同于之前的PaddlePaddle v2版本,在新的Fluid版本里,我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`,我们就可以直接用它来构造 N-gram 神经网络。
- 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏,我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
```python
def inference_program(words, is_sparse):
embed_first = fluid.layers.embedding(
embed_first = fluid.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
embed_second = fluid.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
embed_third = fluid.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_fourth = fluid.layers.embedding(
embed_fourth = fluid.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
......@@ -352,7 +352,7 @@ def train_program(predict_word):
# 'next_word'的定义必须要在inference_program的声明之后,
# 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
# thirdw, fourthw], 这是不正确的.
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
......@@ -377,11 +377,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
test_reader = paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
......
......@@ -269,32 +269,32 @@ dict_size = len(word_dict)
```
A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
- Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
```python
def inference_program(words, is_sparse):
embed_first = fluid.layers.embedding(
embed_first = fluid.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
embed_second = fluid.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
embed_third = fluid.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_fourth = fluid.layers.embedding(
embed_fourth = fluid.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
......@@ -317,7 +317,7 @@ def train_program(predict_word):
# The definition of'next_word' must be after the declaration of inference_program.
# Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
#thirdw, fourthw], This is not true.
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
......@@ -342,11 +342,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
test_reader = paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
......
......@@ -45,25 +45,25 @@ def parse_args():
def inference_program(words, is_sparse):
embed_first = fluid.layers.embedding(
embed_first = fluid.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
embed_second = fluid.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
embed_third = fluid.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='shared_w')
embed_fourth = fluid.layers.embedding(
embed_fourth = fluid.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
......@@ -82,7 +82,7 @@ def train_program(predict_word):
# The declaration of 'next_word' must be after the invoking of inference_program,
# or the data input order of train program would be [next_word, firstw, secondw,
# thirdw, fourthw], which is not correct.
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
......@@ -102,11 +102,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
test_reader = paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
......
......@@ -270,42 +270,42 @@ is_local = True
```python
# 句子序列
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
word = fluid.data(
name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.data(
name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词上下文5个特征
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.data(
name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n1 = fluid.data(
name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_0 = fluid.data(
name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p1 = fluid.data(
name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p2 = fluid.data(
name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词上下区域标志
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.data(
name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
```
### 定义网络结构
首先预训练并定义模型输入层
```python
#预训练谓词和谓词上下区域标志
predicate_embedding = fluid.layers.embedding(
predicate_embedding = fluid.embedding(
input=predicate,
size=[pred_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
mark_embedding = fluid.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
......@@ -316,7 +316,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
# 因词向量是预训练好的,这里不再训练embedding表,
# 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
emb_layers = [
fluid.layers.embedding(
fluid.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(
......@@ -374,8 +374,8 @@ feature_out = fluid.layers.sums(input=[
])
# 标注序列
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
target = fluid.data(
name='target', shape=[None, 1], dtype='int64', lod_level=1)
# 学习 CRF 的转移特征
crf_cost = fluid.layers.linear_chain_crf(
......
......@@ -252,42 +252,42 @@ Defines the format of the model input features, including the sentence sequence,
```python
# Sentence sequences
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
word = fluid.data(
name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
# predicate
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.data(
name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
# predicate context's 5 features
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.data(
name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n1 = fluid.data(
name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_0 = fluid.data(
name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p1 = fluid.data(
name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p2 = fluid.data(
name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
# Predicate conotext area flag
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.data(
name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
```
### Defining the network structure
First pre-train and define the model input layer
```python
#pre-training predicate and predicate context area flags
predicate_embedding = fluid.layers.embedding(
predicate_embedding = fluid.embedding(
input=predicate,
size=[pred_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
mark_embedding = fluid.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
......@@ -298,7 +298,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
#Because word vector is pre-trained, no longer training embedding table,
# The trainable's parameter attribute set to False prevents the embedding table from being updated during training
emb_layers = [
fluid.layers.embedding(
fluid.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(
......@@ -356,7 +356,7 @@ feature_out = fluid.layers.sums(input=[
])
# tag/label sequence
target = fluid.layers.data(
target = fluid.data(
name='target', shape=[1], dtype='int64', lod_level=1)
# Learning CRF transfer features
......
......@@ -312,42 +312,42 @@ is_local = True
```python
# 句子序列
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
word = fluid.data(
name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.data(
name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词上下文5个特征
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.data(
name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n1 = fluid.data(
name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_0 = fluid.data(
name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p1 = fluid.data(
name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p2 = fluid.data(
name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
# 谓词上下区域标志
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.data(
name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
```
### 定义网络结构
首先预训练并定义模型输入层
```python
#预训练谓词和谓词上下区域标志
predicate_embedding = fluid.layers.embedding(
predicate_embedding = fluid.embedding(
input=predicate,
size=[pred_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
mark_embedding = fluid.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
......@@ -358,7 +358,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
# 因词向量是预训练好的,这里不再训练embedding表,
# 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
emb_layers = [
fluid.layers.embedding(
fluid.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(
......@@ -416,8 +416,8 @@ feature_out = fluid.layers.sums(input=[
])
# 标注序列
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
target = fluid.data(
name='target', shape=[None, 1], dtype='int64', lod_level=1)
# 学习 CRF 的转移特征
crf_cost = fluid.layers.linear_chain_crf(
......
......@@ -294,42 +294,42 @@ Defines the format of the model input features, including the sentence sequence,
```python
# Sentence sequences
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
word = fluid.data(
name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
# predicate
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.data(
name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
# predicate context's 5 features
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.data(
name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n1 = fluid.data(
name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_0 = fluid.data(
name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p1 = fluid.data(
name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p2 = fluid.data(
name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
# Predicate conotext area flag
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.data(
name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
```
### Defining the network structure
First pre-train and define the model input layer
```python
#pre-training predicate and predicate context area flags
predicate_embedding = fluid.layers.embedding(
predicate_embedding = fluid.embedding(
input=predicate,
size=[pred_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
mark_embedding = fluid.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
......@@ -340,7 +340,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
#Because word vector is pre-trained, no longer training embedding table,
# The trainable's parameter attribute set to False prevents the embedding table from being updated during training
emb_layers = [
fluid.layers.embedding(
fluid.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(
......@@ -398,7 +398,7 @@ feature_out = fluid.layers.sums(input=[
])
# tag/label sequence
target = fluid.layers.data(
target = fluid.data(
name='target', shape=[1], dtype='int64', lod_level=1)
# Learning CRF transfer features
......
......@@ -53,14 +53,14 @@ def load_parameter(file_name, h, w):
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
**ignored):
# 8 features
predicate_embedding = fluid.layers.embedding(
predicate_embedding = fluid.embedding(
input=predicate,
size=[pred_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
mark_embedding = fluid.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
......@@ -68,7 +68,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
fluid.layers.embedding(
fluid.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(name=embedding_name, trainable=False))
......@@ -120,22 +120,22 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
def train(use_cuda, save_dirname=None, is_local=True):
# define data layers
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
word = fluid.data(
name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
predicate = fluid.data(
name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n2 = fluid.data(
name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_n1 = fluid.data(
name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_0 = fluid.data(
name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p1 = fluid.data(
name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
ctx_p2 = fluid.data(
name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
mark = fluid.data(
name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
if args.enable_ce:
fluid.default_startup_program().random_seed = 90
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册