Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
9ccc94f4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9ccc94f4
编写于
2月 27, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
srl api training
上级
d425a5ca
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
215 addition
and
0 deletion
+215
-0
demo/semantic_role_labeling/api_train_v2.py
demo/semantic_role_labeling/api_train_v2.py
+112
-0
demo/semantic_role_labeling/model_v2.py
demo/semantic_role_labeling/model_v2.py
+103
-0
未找到文件。
demo/semantic_role_labeling/api_train_v2.py
0 → 100644
浏览文件 @
9ccc94f4
import
numpy
import
paddle.v2
as
paddle
from
paddle.trainer_config_helpers.atts
import
ParamAttr
from
mode_v2
import
db_lstm
word_dict_file
=
'./data/wordDict.txt'
label_dict_file
=
'./data/targetDict.txt'
predicate_file
=
'./data/verbDict.txt'
word_dict
=
dict
()
label_dict
=
dict
()
predicate_dict
=
dict
()
with
open
(
word_dict_file
,
'r'
)
as
f_word
,
\
open
(
label_dict_file
,
'r'
)
as
f_label
,
\
open
(
predicate_file
,
'r'
)
as
f_pre
:
for
i
,
line
in
enumerate
(
f_word
):
w
=
line
.
strip
()
word_dict
[
w
]
=
i
for
i
,
line
in
enumerate
(
f_label
):
w
=
line
.
strip
()
label_dict
[
w
]
=
i
for
i
,
line
in
enumerate
(
f_pre
):
w
=
line
.
strip
()
predicate_dict
[
w
]
=
i
word_dict_len
=
len
(
word_dict
)
label_dict_len
=
len
(
label_dict
)
pred_len
=
len
(
predicate_dict
)
def
train_reader
(
file_name
=
"data/feature"
):
def
reader
():
with
open
(
file_name
,
'r'
)
as
fdata
:
for
line
in
fdata
:
sentence
,
predicate
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
,
mark
,
label
=
\
line
.
strip
().
split
(
'
\t
'
)
words
=
sentence
.
split
()
sen_len
=
len
(
words
)
word_slot
=
[
word_dict
.
get
(
w
,
UNK_IDX
)
for
w
in
words
]
predicate_slot
=
[
predicate_dict
.
get
(
predicate
)]
*
sen_len
ctx_n2_slot
=
[
word_dict
.
get
(
ctx_n2
,
UNK_IDX
)]
*
sen_len
ctx_n1_slot
=
[
word_dict
.
get
(
ctx_n1
,
UNK_IDX
)]
*
sen_len
ctx_0_slot
=
[
word_dict
.
get
(
ctx_0
,
UNK_IDX
)]
*
sen_len
ctx_p1_slot
=
[
word_dict
.
get
(
ctx_p1
,
UNK_IDX
)]
*
sen_len
ctx_p2_slot
=
[
word_dict
.
get
(
ctx_p2
,
UNK_IDX
)]
*
sen_len
marks
=
mark
.
split
()
mark_slot
=
[
int
(
w
)
for
w
in
marks
]
label_list
=
label
.
split
()
label_slot
=
[
label_dict
.
get
(
w
)
for
w
in
label_list
]
yield
word_slot
,
ctx_n2_slot
,
ctx_n1_slot
,
\
ctx_0_slot
,
ctx_p1_slot
,
ctx_p2_slot
,
predicate_slot
,
mark_slot
,
label_slot
return
reader
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
label_dict_len
=
500
# define network topology
output
=
db_lstm
()
target
=
paddle
.
layer
.
data
(
name
=
'target'
,
size
=
label_dict_len
)
crf_cost
=
paddle
.
layer
.
crf_layer
(
size
=
500
,
input
=
output
,
label
=
target
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'crfw'
,
initial_std
=
default_std
,
learning_rate
=
mix_hidden_lr
))
crf_dec
=
paddle
.
layer
.
crf_decoding_layer
(
name
=
'crf_dec_l'
,
size
=
label_dict_len
,
input
=
output
,
label
=
target
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'crfw'
))
topo
=
[
crf_cost
,
crf_dec
]
parameters
=
paddle
.
parameters
.
create
(
topo
)
optimizer
=
paddle
.
optimizer
.
Momentum
(
momentum
=
0.01
,
learning_rate
=
2e-2
)
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
para
=
parameters
.
get
(
'___fc_2__.w0'
)
print
"Pass %d, Batch %d, Cost %f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
para
.
mean
())
else
:
pass
trainer
=
paddle
.
trainer
.
SGD
(
update_equation
=
optimizer
)
trainer
.
train
(
train_data_reader
=
train_reader
,
batch_size
=
32
,
topology
=
topo
,
parameters
=
parameters
,
event_handler
=
event_handler
,
num_passes
=
10000
,
data_types
=
[],
reader_dict
=
{})
if
__name__
==
'__main__'
:
main
()
demo/semantic_role_labeling/model_v2.py
0 → 100644
浏览文件 @
9ccc94f4
import
paddle.v2
as
paddle
def
db_lstm
(
word_dict_len
,
label_dict_len
,
pred_len
):
mark_dict_len
=
2
word_dim
=
32
mark_dim
=
5
hidden_dim
=
512
depth
=
8
#8 features
word
=
paddle
.
layer
.
data
(
name
=
'word_data'
,
size
=
word_dict_len
)
predicate
=
paddle
.
layer
.
data
(
name
=
'verb_data'
,
size
=
pred_len
)
ctx_n2
=
paddle
.
layer
.
data
(
name
=
'ctx_n2_data'
,
size
=
word_dict_len
)
ctx_n1
=
paddle
.
layer
.
data
(
name
=
'ctx_n1_data'
,
size
=
word_dict_len
)
ctx_0
=
paddle
.
layer
.
data
(
name
=
'ctx_0_data'
,
size
=
word_dict_len
)
ctx_p1
=
paddle
.
layer
.
data
(
name
=
'ctx_p1_data'
,
size
=
word_dict_len
)
ctx_p2
=
paddle
.
layer
.
data
(
name
=
'ctx_p2_data'
,
size
=
word_dict_len
)
mark
=
paddle
.
layer
.
data
(
name
=
'mark_data'
,
size
=
mark_dict_len
)
default_std
=
1
/
math
.
sqrt
(
hidden_dim
)
/
3.0
emb_para
=
paddle
.
attr
.
Param
(
name
=
'emb'
,
initial_std
=
0.
,
learning_rate
=
0.
)
std_0
=
paddle
.
attr
.
Param
(
initial_std
=
0.
)
std_default
=
paddle
.
attr
.
Param
(
initial_std
=
default_std
)
predicate_embedding
=
paddle
.
layer
.
embeding
(
size
=
word_dim
,
input
=
predicate
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'vemb'
,
initial_std
=
default_std
))
mark_embedding
=
paddle
.
layer
.
embeding
(
name
=
'word_ctx-in_embedding'
,
size
=
mark_dim
,
input
=
mark
,
param_attr
=
std_0
)
word_input
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
]
emb_layers
=
[
paddle
.
layer
.
embeding
(
size
=
word_dim
,
input
=
x
,
param_attr
=
emb_para
)
for
x
in
word_input
]
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
hidden_0
=
paddle
.
layer
.
mixed
(
size
=
hidden_dim
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
emb
,
param_attr
=
std_default
)
for
emb
in
emb_layers
])
mix_hidden_lr
=
1e-3
lstm_para_attr
=
paddle
.
attr
.
Param
(
initial_std
=
0.0
,
learning_rate
=
1.0
)
hidden_para_attr
=
paddle
.
attr
.
Param
(
initial_std
=
default_std
,
learning_rate
=
mix_hidden_lr
)
lstm_0
=
paddle
.
layer
.
lstmemory
(
input
=
hidden_0
,
act
=
paddle
.
activation
.
Relu
(),
gate_act
=
paddle
.
activation
.
Sigmoid
(),
state_act
=
paddle
.
activation
.
Sigmoid
(),
bias_attr
=
std_0
,
param_attr
=
lstm_para_attr
)
#stack L-LSTM and R-LSTM with direct edges
input_tmp
=
[
hidden_0
,
lstm_0
]
for
i
in
range
(
1
,
depth
):
mix_hidden
=
paddle
.
layer
.
mixed
(
size
=
hidden_dim
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
0
],
param_attr
=
hidden_para_attr
),
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
1
],
param_attr
=
lstm_para_attr
)
])
lstm
=
paddle
.
layer
.
lstmemory
(
input
=
mix_hidden
,
act
=
paddle
.
activation
.
Relu
(),
gate_act
=
paddle
.
activation
.
Sigmoid
(),
state_act
=
paddle
.
activation
.
Sigmoid
(),
reverse
=
((
i
%
2
)
==
1
),
bias_attr
=
std_0
,
param_attr
=
lstm_para_attr
)
input_tmp
=
[
mix_hidden
,
lstm
]
feature_out
=
paddle
.
layer
.
mixed
(
size
=
label_dict_len
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
0
],
param_attr
=
hidden_para_attr
),
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
1
],
param_attr
=
lstm_para_attr
)
],
)
return
feature_out
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录