提交 92185207 编写于 作者: D dangqingqing

generate html

上级 ec6cee60
...@@ -266,6 +266,8 @@ We trained in the English Wikipedia language model to get a word vector lookup t ...@@ -266,6 +266,8 @@ We trained in the English Wikipedia language model to get a word vector lookup t
Get dictionary, print dictionary size: Get dictionary, print dictionary size:
```python ```python
import math
import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
...@@ -274,164 +276,164 @@ word_dict_len = len(word_dict) ...@@ -274,164 +276,164 @@ word_dict_len = len(word_dict)
label_dict_len = len(label_dict) label_dict_len = len(label_dict)
pred_len = len(verb_dict) pred_len = len(verb_dict)
print len(word_dict_len) print word_dict_len
print len(label_dict_len) print label_dict_len
print len(pred_len) print pred_len
``` ```
## Model configuration ## Model configuration
1. Define input data dimensions and model hyperparameters. - 1. Define input data dimensions and model hyperparameters.
```python ```python
mark_dict_len = 2 # Value range of region mark. Region mark is either 0 or 1, so range is 2 mark_dict_len = 2 # Value range of region mark. Region mark is either 0 or 1, so range is 2
word_dim = 32 # word vector dimension word_dim = 32 # word vector dimension
mark_dim = 5 # adjacent dimension mark_dim = 5 # adjacent dimension
hidden_dim = 512 # the dimension of LSTM hidden layer vector is 128 (512/4) hidden_dim = 512 # the dimension of LSTM hidden layer vector is 128 (512/4)
depth = 8 # depth of stacked LSTM depth = 8 # depth of stacked LSTM
# There are 9 features per sample, so we will define 9 data layers. # There are 9 features per sample, so we will define 9 data layers.
# They type for each layer is integer_value_sequence. # They type for each layer is integer_value_sequence.
def d_type(value_range): def d_type(value_range):
return paddle.data_type.integer_value_sequence(value_range) return paddle.data_type.integer_value_sequence(value_range)
# word sequence # word sequence
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
# predicate # predicate
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
# 5 features for predicate context # 5 features for predicate context
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
# region marker sequence # region marker sequence
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
# label sequence # label sequence
target = paddle.layer.data(name='target', type=d_type(label_dict_len)) target = paddle.layer.data(name='target', type=d_type(label_dict_len))
``` ```
Speciala note: hidden_dim = 512 means LSTM hidden vector of 128 dimension (512/4). Please refer PaddlePaddle official documentation for detail: [lstmemory](http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/layers.html#lstmemory)。 Speciala note: hidden_dim = 512 means LSTM hidden vector of 128 dimension (512/4). Please refer PaddlePaddle official documentation for detail: [lstmemory](http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/layers.html#lstmemory)。
- 2. The word sequence, predicate, predicate context, and region mark sequence are transformed into embedding vector sequences.
```python
# Since word vectorlookup table is pre-trained, we won't update it this time.
# is_static being True prevents updating the lookup table during training.
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
# hyperparameter configurations
default_std = 1 / math.sqrt(hidden_dim) / 3.0
std_default = paddle.attr.Param(initial_std=default_std)
std_0 = paddle.attr.Param(initial_std=0.)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
```
2. The word sequence, predicate, predicate context, and region mark sequence are transformed into embedding vector sequences. - 3. 8 LSTM units will be trained in "forward / backward" order.
```python ```python
hidden_0 = paddle.layer.mixed(
# Since word vectorlookup table is pre-trained, we won't update it this time. size=hidden_dim,
# is_static being True prevents updating the lookup table during training. bias_attr=std_default,
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) input=[
# hyperparameter configurations paddle.layer.full_matrix_projection(
default_std = 1 / math.sqrt(hidden_dim) / 3.0 input=emb, param_attr=std_default) for emb in emb_layers
std_default = paddle.attr.Param(initial_std=default_std) ])
std_0 = paddle.attr.Param(initial_std=0.)
mix_hidden_lr = 1e-3
predicate_embedding = paddle.layer.embedding( lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
size=word_dim, hidden_para_attr = paddle.attr.Param(
input=predicate, initial_std=default_std, learning_rate=mix_hidden_lr)
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std)) lstm_0 = paddle.layer.lstmemory(
mark_embedding = paddle.layer.embedding( input=hidden_0,
size=mark_dim, input=mark, param_attr=std_0) act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] state_act=paddle.activation.Sigmoid(),
emb_layers = [ bias_attr=std_0,
paddle.layer.embedding( param_attr=lstm_para_attr)
size=word_dim, input=x, param_attr=emb_para) for x in word_input
] # stack L-LSTM and R-LSTM with direct edges
emb_layers.append(predicate_embedding) input_tmp = [hidden_0, lstm_0]
emb_layers.append(mark_embedding)
``` for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
3. 8 LSTM units will be trained in "forward / backward" order.
```python
hidden_0 = paddle.layer.mixed(
size=hidden_dim, size=hidden_dim,
bias_attr=std_default, bias_attr=std_default,
input=[ input=[
paddle.layer.full_matrix_projection( paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
]) ])
mix_hidden_lr = 1e-3 lstm = paddle.layer.lstmemory(
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) input=mix_hidden,
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(), act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(), gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(), state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0, bias_attr=std_0,
param_attr=lstm_para_attr) param_attr=lstm_para_attr)
# stack L-LSTM and R-LSTM with direct edges input_tmp = [mix_hidden, lstm]
input_tmp = [hidden_0, lstm_0] ```
for i in range(1, depth): - 4. We will concatenate the output of top LSTM unit with it's input, and project into a hidden layer. Then put a fully connected layer on top of it to get the final vector representation.
mix_hidden = paddle.layer.mixed(
size=hidden_dim, ```python
bias_attr=std_default, feature_out = paddle.layer.mixed(
input=[ size=label_dict_len,
paddle.layer.full_matrix_projection( bias_attr=std_default,
input=input_tmp[0], param_attr=hidden_para_attr), input=[
paddle.layer.full_matrix_projection( paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr) input=input_tmp[0], param_attr=hidden_para_attr),
]) paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
lstm = paddle.layer.lstmemory( ], )
input=mix_hidden, ```
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(), - 5. We use CRF as cost function, the parameter of CRF cost will be named `crfw`.
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1), ```python
bias_attr=std_0, crf_cost = paddle.layer.crf(
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
```
4. We will concatenate the output of top LSTM unit with it's input, and project into a hidden layer. Then put a fully connected layer on top of it to get the final vector representation.
```python
feature_out = paddle.layer.mixed(
size=label_dict_len, size=label_dict_len,
bias_attr=std_default, input=feature_out,
input=[ label=target,
paddle.layer.full_matrix_projection( param_attr=paddle.attr.Param(
input=input_tmp[0], param_attr=hidden_para_attr), name='crfw',
paddle.layer.full_matrix_projection( initial_std=default_std,
input=input_tmp[1], param_attr=lstm_para_attr) learning_rate=mix_hidden_lr))
], ) ```
```
- 6. CRF decoding layer is used for evaluation and inference. It shares parameter with CRF layer. The sharing of parameters among multiple layers is specified by the same parameter name in these layers.
5. We use CRF as cost function, the parameter of CRF cost will be named `crfw`.
```python
```python crf_dec = paddle.layer.crf_decoding(
crf_cost = paddle.layer.crf( name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param( param_attr=paddle.attr.Param(name='crfw'))
name='crfw', ```
initial_std=default_std,
learning_rate=mix_hidden_lr))
```
6. CRF decoding layer is used for evaluation and inference. It shares parameter with CRF layer. The sharing of parameters among multiple layers is specified by the same parameter name in these layers.
```python
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
```
## Train model ## Train model
...@@ -454,8 +456,8 @@ Now we load pre-trained word lookup table. ...@@ -454,8 +456,8 @@ Now we load pre-trained word lookup table.
```python ```python
def load_parameter(file_name, h, w): def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
f.read(16) f.read(16)
return np.fromfile(f, dtype=np.float32).reshape(h, w) return np.fromfile(f, dtype=np.float32).reshape(h, w)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
``` ```
......
...@@ -228,171 +228,175 @@ conll05st-release/ ...@@ -228,171 +228,175 @@ conll05st-release/
获取词典,打印词典大小: 获取词典,打印词典大小:
```python ```python
import math
import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
paddle.init(use_gpu=False, trainer_count=1)
word_dict, verb_dict, label_dict = conll05.get_dict() word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict) word_dict_len = len(word_dict)
label_dict_len = len(label_dict) label_dict_len = len(label_dict)
pred_len = len(verb_dict) pred_len = len(verb_dict)
print len(word_dict_len) print word_dict_len
print len(label_dict_len) print label_dict_len
print len(pred_len) print pred_len
``` ```
## 模型配置说明 ## 模型配置说明
1. 定义输入数据维度及模型超参数。 - 1. 定义输入数据维度及模型超参数。
```python ```python
mark_dict_len = 2 # 谓上下文区域标志的维度,是一个0-1 2值特征,因此维度为2 mark_dict_len = 2 # 谓上下文区域标志的维度,是一个0-1 2值特征,因此维度为2
word_dim = 32 # 词向量维度 word_dim = 32 # 词向量维度
mark_dim = 5 # 谓词上下文区域通过词表被映射为一个实向量,这个是相邻的维度 mark_dim = 5 # 谓词上下文区域通过词表被映射为一个实向量,这个是相邻的维度
hidden_dim = 512 # LSTM隐层向量的维度 : 512 / 4 hidden_dim = 512 # LSTM隐层向量的维度 : 512 / 4
depth = 8 # 栈式LSTM的深度 depth = 8 # 栈式LSTM的深度
# 一条样本总共9个特征,下面定义了9个data层,每个层类型为integer_value_sequence,表示整数ID的序列类型.
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
# 句子序列
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
# 谓词
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
# 谓词上下文5个特征
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
# 谓词上下区域标志
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
# 标注序列 # 一条样本总共9个特征,下面定义了9个data层,每个层类型为integer_value_sequence,表示整数ID的序列类型.
target = paddle.layer.data(name='target', type=d_type(label_dict_len)) def d_type(size):
``` return paddle.data_type.integer_value_sequence(size)
# 句子序列
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
# 谓词
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
# 谓词上下文5个特征
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
# 谓词上下区域标志
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
# 标注序列
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
```
这里需要特别说明的是hidden_dim = 512指定了LSTM隐层向量的维度为128维,关于这一点请参考PaddlePaddle官方文档中[lstmemory](http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/layers.html#lstmemory)的说明。 这里需要特别说明的是hidden_dim = 512指定了LSTM隐层向量的维度为128维,关于这一点请参考PaddlePaddle官方文档中[lstmemory](http://www.paddlepaddle.org/doc/ui/api/trainer_config_helpers/layers.html#lstmemory)的说明。
2. 将句子序列、谓词、谓词上下文、谓词上下文区域标记通过词表,转换为实向量表示的词向量序列。 - 2. 将句子序列、谓词、谓词上下文、谓词上下文区域标记通过词表,转换为实向量表示的词向量序列。
```python
# 在本教程中,我们加载了预训练的词向量,这里设置了:is_static=True
# is_static 为 True 时保证了在训练 SRL 模型过程中,词表不再更新
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
# 设置超参数
default_std = 1 / math.sqrt(hidden_dim) / 3.0
std_default = paddle.attr.Param(initial_std=default_std)
std_0 = paddle.attr.Param(initial_std=0.)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
```
```python - 3. 8个LSTM单元以“正向/反向”的顺序对所有输入序列进行学习。
# 在本教程中,我们加载了预训练的词向量,这里设置了:is_static=True ```python
# is_static 为 True 时保证了在训练 SRL 模型过程中,词表不再更新 hidden_0 = paddle.layer.mixed(
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True) size=hidden_dim,
# 设置超参数 bias_attr=std_default,
default_std = 1 / math.sqrt(hidden_dim) / 3.0 input=[
std_default = paddle.attr.Param(initial_std=default_std) paddle.layer.full_matrix_projection(
std_0 = paddle.attr.Param(initial_std=0.) input=emb, param_attr=std_default) for emb in emb_layers
])
predicate_embedding = paddle.layer.embedding(
size=word_dim, mix_hidden_lr = 1e-3
input=predicate, lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
param_attr=paddle.attr.Param( hidden_para_attr = paddle.attr.Param(
name='vemb', initial_std=default_std)) initial_std=default_std, learning_rate=mix_hidden_lr)
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0) lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] act=paddle.activation.Relu(),
emb_layers = [ gate_act=paddle.activation.Sigmoid(),
paddle.layer.embedding( state_act=paddle.activation.Sigmoid(),
size=word_dim, input=x, param_attr=emb_para) for x in word_input bias_attr=std_0,
] param_attr=lstm_para_attr)
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding) #stack L-LSTM and R-LSTM with direct edges
``` input_tmp = [hidden_0, lstm_0]
3. 8个LSTM单元以“正向/反向”的顺序对所有输入序列进行学习。 for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
```python
hidden_0 = paddle.layer.mixed(
size=hidden_dim, size=hidden_dim,
bias_attr=std_default, bias_attr=std_default,
input=[ input=[
paddle.layer.full_matrix_projection( paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
]) ])
mix_hidden_lr = 1e-3 lstm = paddle.layer.lstmemory(
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) input=mix_hidden,
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(), act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(), gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(), state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0, bias_attr=std_0,
param_attr=lstm_para_attr) param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges input_tmp = [mix_hidden, lstm]
input_tmp = [hidden_0, lstm_0] ```
for i in range(1, depth): - 4. 取最后一个栈式LSTM的输出和这个LSTM单元的输入到隐层映射,经过一个全连接层映射到标记字典的维度,得到最终的特征向量表示。
mix_hidden = paddle.layer.mixed(
size=hidden_dim, ```python
bias_attr=std_default, feature_out = paddle.layer.mixed(
input=[ size=label_dict_len,
paddle.layer.full_matrix_projection( bias_attr=std_default,
input=input_tmp[0], param_attr=hidden_para_attr), input=[
paddle.layer.full_matrix_projection( paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr) input=input_tmp[0], param_attr=hidden_para_attr),
]) paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
lstm = paddle.layer.lstmemory( ], )
input=mix_hidden, ```
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(), - 5. 网络的末端定义CRF层计算损失(cost),指定参数名字为 `crfw`,该层需要输入正确的数据标签(target)。
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1), ```python
bias_attr=std_0, crf_cost = paddle.layer.crf(
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
```
4. 取最后一个栈式LSTM的输出和这个LSTM单元的输入到隐层映射,经过一个全连接层映射到标记字典的维度,得到最终的特征向量表示。
```python
feature_out = paddle.layer.mixed(
size=label_dict_len, size=label_dict_len,
bias_attr=std_default, input=feature_out,
input=[ label=target,
paddle.layer.full_matrix_projection( param_attr=paddle.attr.Param(
input=input_tmp[0], param_attr=hidden_para_attr), name='crfw',
paddle.layer.full_matrix_projection( initial_std=default_std,
input=input_tmp[1], param_attr=lstm_para_attr) learning_rate=mix_hidden_lr))
], ) ```
```
- 6. CRF译码层和CRF层参数名字相同,即共享权重。如果输入了正确的数据标签(target),会统计错误标签的个数,可以用来评估模型。如果没有输入正确的数据标签,该层可以推到出最优解,可以用来预测模型。
5. 网络的末端定义CRF层计算损失(cost),指定参数名字为 `crfw`,该层需要输入正确的数据标签(target)。
```python
```python crf_dec = paddle.layer.crf_decoding(
crf_cost = paddle.layer.crf( name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param( param_attr=paddle.attr.Param(name='crfw'))
name='crfw', ```
initial_std=default_std,
learning_rate=mix_hidden_lr))
```
6. CRF译码层和CRF层参数名字相同,即共享权重。如果输入了正确的数据标签(target),会统计错误标签的个数,可以用来评估模型。如果没有输入正确的数据标签,该层可以推到出最优解,可以用来预测模型。
```python
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
```
## 训练模型 ## 训练模型
...@@ -417,8 +421,8 @@ print parameters.keys() ...@@ -417,8 +421,8 @@ print parameters.keys()
# 这里加载PaddlePaddle上版保存的二进制模型 # 这里加载PaddlePaddle上版保存的二进制模型
def load_parameter(file_name, h, w): def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
f.read(16) f.read(16)
return np.fromfile(f, dtype=np.float32).reshape(h, w) return np.fromfile(f, dtype=np.float32).reshape(h, w)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
``` ```
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册