Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
431f46fb
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
431f46fb
编写于
8月 29, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix infer.
上级
f0a11911
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
285 addition
and
78 deletion
+285
-78
globally_normalized_reader/basic_modules.py
globally_normalized_reader/basic_modules.py
+0
-1
globally_normalized_reader/beam_decoding.py
globally_normalized_reader/beam_decoding.py
+206
-0
globally_normalized_reader/config.py
globally_normalized_reader/config.py
+6
-3
globally_normalized_reader/infer.py
globally_normalized_reader/infer.py
+35
-55
globally_normalized_reader/model.py
globally_normalized_reader/model.py
+26
-14
globally_normalized_reader/train.py
globally_normalized_reader/train.py
+12
-5
未找到文件。
globally_normalized_reader/basic_modules.py
浏览文件 @
431f46fb
#!/usr/bin/env python
#coding=utf-8
import
pdb
import
collections
import
paddle.v2
as
paddle
...
...
globally_normalized_reader/beam_decoding.py
0 → 100644
浏览文件 @
431f46fb
#!/usr/bin/env python
#coding=utf-8
import
pdb
import
numpy
as
np
__all__
=
[
"BeamDecoding"
]
class
BeamDecoding
(
object
):
def
__init__
(
self
,
documents
,
sentence_scores
,
selected_sentences
,
start_scores
,
selected_starts
,
end_scores
,
selected_ends
):
self
.
documents
=
documents
self
.
sentence_scores
=
sentence_scores
self
.
selected_sentences
=
selected_sentences
self
.
start_scores
=
start_scores
self
.
selected_starts
=
selected_starts
self
.
end_scores
=
end_scores
self
.
selected_ends
=
selected_ends
# sequence start position information for the three step search
# beam1 is to search the sequence index
self
.
beam1_seq_start_positions
=
[]
# beam2 is to search the start answer span
self
.
beam2_seq_start_positions
=
[]
# beam3 is to search the end answer span
self
.
beam3_seq_start_positions
=
[]
self
.
ans_per_sample_in_a_batch
=
[
0
]
self
.
all_searched_ans
=
[]
self
.
final_ans
=
[[]
for
i
in
range
(
len
(
documents
))]
def
_build_beam1_seq_info
(
self
):
self
.
beam1_seq_start_positions
.
append
([
0
])
for
idx
,
one_doc
in
enumerate
(
self
.
documents
):
for
sentence
in
one_doc
:
self
.
beam1_seq_start_positions
[
-
1
].
append
(
self
.
beam1_seq_start_positions
[
-
1
][
-
1
]
+
len
(
sentence
))
if
len
(
self
.
beam1_seq_start_positions
)
!=
len
(
self
.
documents
):
self
.
beam1_seq_start_positions
.
append
(
[
self
.
beam1_seq_start_positions
[
-
1
][
-
1
]])
def
_build_beam2_seq_info
(
self
):
seq_num
,
beam_size
=
self
.
selected_sentences
.
shape
self
.
beam2_seq_start_positions
.
append
([
0
])
for
i
in
range
(
seq_num
):
for
j
in
range
(
beam_size
):
selected_id
=
int
(
self
.
selected_sentences
[
i
][
j
])
if
selected_id
==
-
1
:
break
seq_len
=
self
.
beam1_seq_start_positions
[
i
][
selected_id
+
1
]
-
self
.
beam1_seq_start_positions
[
i
][
selected_id
]
self
.
beam2_seq_start_positions
[
-
1
].
append
(
self
.
beam2_seq_start_positions
[
-
1
][
-
1
]
+
seq_len
)
if
len
(
self
.
beam2_seq_start_positions
)
!=
seq_num
:
self
.
beam2_seq_start_positions
.
append
(
[
self
.
beam2_seq_start_positions
[
-
1
][
-
1
]])
def
_build_beam3_seq_info
(
self
):
seq_num_in_a_batch
=
len
(
self
.
documents
)
seq_id
=
0
sub_seq_id
=
0
sub_seq_count
=
len
(
self
.
beam2_seq_start_positions
[
seq_id
])
-
1
self
.
beam3_seq_start_positions
.
append
([
0
])
sub_seq_num
,
beam_size
=
self
.
selected_starts
.
shape
for
i
in
range
(
sub_seq_num
):
seq_len
=
self
.
beam2_seq_start_positions
[
seq_id
][
sub_seq_id
+
1
]
-
self
.
beam2_seq_start_positions
[
seq_id
][
sub_seq_id
]
for
j
in
range
(
beam_size
):
start_id
=
int
(
self
.
selected_starts
[
i
][
j
])
if
start_id
==
-
1
:
break
self
.
beam3_seq_start_positions
[
-
1
].
append
(
self
.
beam3_seq_start_positions
[
-
1
][
-
1
]
+
seq_len
-
start_id
)
sub_seq_id
+=
1
if
sub_seq_id
==
sub_seq_count
:
if
len
(
self
.
beam3_seq_start_positions
)
!=
seq_num_in_a_batch
:
self
.
beam3_seq_start_positions
.
append
(
[
self
.
beam3_seq_start_positions
[
-
1
][
-
1
]])
sub_seq_id
=
0
seq_id
+=
1
sub_seq_count
=
len
(
self
.
beam2_seq_start_positions
[
seq_id
])
-
1
assert
(
self
.
beam3_seq_start_positions
[
-
1
][
-
1
]
==
self
.
end_scores
.
shape
[
0
])
def
_build_seq_info_for_each_beam
(
self
):
self
.
_build_beam1_seq_info
()
self
.
_build_beam2_seq_info
()
self
.
_build_beam3_seq_info
()
def
_cal_ans_per_sample_in_a_batch
(
self
):
start_row
=
0
for
seq
in
self
.
beam3_seq_start_positions
:
end_row
=
start_row
+
len
(
seq
)
-
1
ans_count
=
np
.
sum
(
self
.
selected_ends
[
start_row
:
end_row
,
:]
!=
-
1.
)
self
.
ans_per_sample_in_a_batch
.
append
(
self
.
ans_per_sample_in_a_batch
[
-
1
]
+
ans_count
)
start_row
=
end_row
def
_get_valid_seleceted_ids
(
slef
,
mat
):
flattened
=
[]
height
,
width
=
mat
.
shape
for
i
in
range
(
height
):
for
j
in
range
(
width
):
if
mat
[
i
][
j
]
==
-
1.
:
break
flattened
.
append
([
int
(
mat
[
i
][
j
]),
[
i
,
j
]])
return
flattened
def
decoding
(
self
):
self
.
_build_seq_info_for_each_beam
()
self
.
_cal_ans_per_sample_in_a_batch
()
seq_id
=
0
sub_seq_id
=
0
sub_seq_count
=
len
(
self
.
beam3_seq_start_positions
[
seq_id
])
-
1
sub_seq_num
,
beam_size
=
self
.
selected_ends
.
shape
for
i
in
xrange
(
sub_seq_num
):
seq_offset_in_batch
=
self
.
beam3_seq_start_positions
[
seq_id
][
sub_seq_id
]
for
j
in
xrange
(
beam_size
):
end_pos
=
int
(
self
.
selected_ends
[
i
][
j
])
if
end_pos
==
-
1
:
break
self
.
all_searched_ans
.
append
({
"score"
:
self
.
end_scores
[
seq_offset_in_batch
+
end_pos
],
"sentence_pos"
:
-
1
,
"start_span_pos"
:
-
1
,
"end_span_pos"
:
end_pos
,
"parent_ids_in_prev_beam"
:
i
})
sub_seq_id
+=
1
if
sub_seq_id
==
sub_seq_count
:
seq_id
+=
1
if
seq_id
==
len
(
self
.
beam3_seq_start_positions
):
break
sub_seq_id
=
0
sub_seq_count
=
len
(
self
.
beam3_seq_start_positions
[
seq_id
])
-
1
assert
len
(
self
.
all_searched_ans
)
==
self
.
ans_per_sample_in_a_batch
[
-
1
]
seq_id
=
0
sub_seq_id
=
0
sub_seq_count
=
len
(
self
.
beam2_seq_start_positions
[
seq_id
])
-
1
last_row_id
=
None
starts
=
self
.
_get_valid_seleceted_ids
(
self
.
selected_starts
)
for
i
,
ans
in
enumerate
(
self
.
all_searched_ans
):
ans
[
"start_span_pos"
]
=
starts
[
ans
[
"parent_ids_in_prev_beam"
]][
0
]
seq_offset_in_batch
=
(
self
.
beam2_seq_start_positions
[
seq_id
][
sub_seq_id
])
ans
[
"score"
]
+=
self
.
start_scores
[(
seq_offset_in_batch
+
ans
[
"start_span_pos"
])]
ans
[
"parent_ids_in_prev_beam"
]
=
starts
[
ans
[
"parent_ids_in_prev_beam"
]][
1
][
0
]
if
last_row_id
and
last_row_id
!=
ans
[
"parent_ids_in_prev_beam"
]:
sub_seq_id
+=
1
if
sub_seq_id
==
sub_seq_count
:
seq_id
+=
1
if
seq_id
==
len
(
self
.
beam2_seq_start_positions
):
break
sub_seq_count
=
len
(
self
.
beam2_seq_start_positions
[
seq_id
])
-
1
sub_seq_id
=
0
last_row_id
=
ans
[
"parent_ids_in_prev_beam"
]
offset_info
=
[
0
]
for
sen
in
self
.
beam1_seq_start_positions
[:
-
1
]:
offset_info
.
append
(
offset_info
[
-
1
]
+
len
(
sen
)
-
1
)
sen_ids
=
self
.
_get_valid_seleceted_ids
(
self
.
selected_sentences
)
for
ans
in
self
.
all_searched_ans
:
ans
[
"sentence_pos"
]
=
sen_ids
[
ans
[
"parent_ids_in_prev_beam"
]][
0
]
row_id
=
ans
[
"parent_ids_in_prev_beam"
]
/
beam_size
offset
=
offset_info
[
row_id
-
1
]
if
row_id
else
0
ans
[
"score"
]
+=
self
.
sentence_scores
[
offset
+
ans
[
"sentence_pos"
]]
for
i
in
range
(
len
(
self
.
ans_per_sample_in_a_batch
)
-
1
):
start_pos
=
self
.
ans_per_sample_in_a_batch
[
i
]
end_pos
=
self
.
ans_per_sample_in_a_batch
[
i
+
1
]
for
ans
in
sorted
(
self
.
all_searched_ans
[
start_pos
:
end_pos
],
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
):
self
.
final_ans
[
i
].
append
({
"score"
:
ans
[
"score"
],
"label"
:
[
ans
[
"sentence_pos"
],
ans
[
"start_span_pos"
],
ans
[
"end_span_pos"
]
]
})
return
self
.
final_ans
globally_normalized_reader/config.py
浏览文件 @
431f46fb
...
...
@@ -27,10 +27,13 @@ class TrainerConfig(object):
data_dir
=
"data/featurized"
save_dir
=
"models"
train_batch_size
=
4
*
10
test_batch_size
=
1
use_gpu
=
True
trainer_count
=
4
train_batch_size
=
trainer_count
*
10
epochs
=
100
test_batch_size
=
4
epochs
=
20
# for debug print, if set to 0, no information will be printed.
show_parameter_status_period
=
0
...
...
globally_normalized_reader/infer.py
浏览文件 @
431f46fb
...
...
@@ -5,7 +5,6 @@ import sys
import
gzip
import
logging
import
numpy
as
np
import
pdb
import
paddle.v2
as
paddle
from
paddle.v2.layer
import
parse_network
...
...
@@ -14,6 +13,7 @@ import reader
from
model
import
GNR
from
train
import
choose_samples
from
config
import
ModelConfig
,
TrainerConfig
from
beam_decoding
import
BeamDecoding
logger
=
logging
.
getLogger
(
"paddle"
)
logger
.
setLevel
(
logging
.
INFO
)
...
...
@@ -27,67 +27,44 @@ def load_reverse_dict(dict_file):
return
word_dict
def
parse_one_sample
(
raw_input_doc
,
sub_sen_scores
,
selected_sentence
,
start_span_scores
,
selected_starts
,
end_span_scores
,
selected_ends
):
assert
len
(
raw_input_doc
)
==
sub_sen_scores
.
shape
[
0
]
beam_size
=
selected_sentence
.
shape
[
1
]
all_searched_ans
=
[]
for
i
in
xrange
(
selected_ends
.
shape
[
0
]):
for
j
in
xrange
(
selected_ends
.
shape
[
1
]):
if
selected_ends
[
i
][
j
]
==
-
1.
:
break
all_searched_ans
.
append
({
'score'
:
end_span_scores
[
int
(
selected_ends
[
i
][
j
])],
'sentence_pos'
:
-
1
,
'start_span_pos'
:
-
1
,
'end_span_pos'
:
int
(
selected_ends
[
i
][
j
]),
'parent_ids_in_prev_beam'
:
i
})
for
path
in
all_searched_ans
:
row_id
=
path
[
'parent_ids_in_prev_beam'
]
/
beam_size
col_id
=
path
[
'parent_ids_in_prev_beam'
]
%
beam_size
path
[
'start_span_pos'
]
=
int
(
selected_starts
[
row_id
][
col_id
])
path
[
'score'
]
+=
start_span_scores
[
path
[
'start_span_pos'
]]
path
[
'parent_ids_in_prev_beam'
]
=
row_id
for
path
in
all_searched_ans
:
row_id
=
path
[
'parent_ids_in_prev_beam'
]
/
beam_size
col_id
=
path
[
'parent_ids_in_prev_beam'
]
%
beam_size
path
[
'sentence_pos'
]
=
int
(
selected_sentence
[
row_id
][
col_id
])
path
[
'score'
]
+=
sub_sen_scores
[
path
[
'sentence_pos'
]]
all_searched_ans
.
sort
(
key
=
lambda
x
:
x
[
'score'
],
reverse
=
True
)
return
all_searched_ans
def
print_result
(
test_batch
,
predicted_ans
,
ids_2_word
,
print_top_k
=
1
):
for
i
,
sample
in
enumerate
(
test_batch
):
query_words
=
[
ids_2_word
[
ids
]
for
ids
in
sample
[
0
]]
print
(
"query:
\t
%s"
%
(
" "
.
join
(
query_words
)))
print
(
"documents:"
)
for
j
,
sen
in
enumerate
(
sample
[
1
]):
sen_words
=
[
ids_2_word
[
ids
]
for
ids
in
sen
]
start
=
sample
[
4
]
end
=
sample
[
4
]
+
sample
[
5
]
+
1
print
(
"%d
\t
%s"
%
(
j
,
" "
.
join
(
sen_words
)))
print
(
"gold:
\t
[%d %d %d] %s"
%
(
sample
[
3
],
sample
[
4
],
sample
[
5
],
" "
.
join
(
[
ids_2_word
[
ids
]
for
ids
in
sample
[
1
][
sample
[
3
]][
start
:
end
]])))
print
(
"predicted:"
)
for
k
in
range
(
print_top_k
):
label
=
predicted_ans
[
i
][
k
][
"label"
]
start
=
label
[
1
]
end
=
label
[
1
]
+
label
[
2
]
+
1
ans_words
=
[
ids_2_word
[
ids
]
for
ids
in
sample
[
1
][
label
[
0
]][
start
:
end
]
]
print
(
"%.4f
\t
[%d %d %d] %s"
%
(
predicted_ans
[
i
][
k
][
"score"
],
label
[
0
],
label
[
1
],
label
[
2
],
" "
.
join
(
ans_words
)))
print
(
"
\n
"
)
def
infer_a_batch
(
inferer
,
test_batch
,
ids_2_word
,
out_layer_count
):
outs
=
inferer
.
infer
(
input
=
test_batch
,
flatten_result
=
False
,
field
=
"value"
)
for
test_sample
in
test_batch
:
query_word
=
[
ids_2_word
[
ids
]
for
ids
in
test_sample
[
0
]]
print
(
"query
\n\t
%s
\n
document"
%
(
" "
.
join
(
query_word
)))
# iterate over each word of in document
for
i
,
sentence
in
enumerate
(
test_sample
[
1
]):
sen_word
=
[
ids_2_word
[
ids
]
for
ids
in
sentence
]
print
(
"%d
\t
%s"
%
(
i
,
" "
.
join
(
sen_word
)))
print
(
"gold
\t
[%d %d %d]"
%
(
test_sample
[
3
],
test_sample
[
4
],
test_sample
[
5
]))
ans
=
parse_one_sample
(
test_sample
[
1
],
*
outs
)[
0
]
ans_ids
=
test_sample
[
1
][
ans
[
'sentence_pos'
]][
ans
[
'start_span_pos'
]:
ans
[
'start_span_pos'
]
+
ans
[
'end_span_pos'
]]
ans_str
=
" "
.
join
([
ids_2_word
[
ids
]
for
ids
in
ans_ids
])
print
(
"searched answer
\t
[%d %d %d]
\n\t
%s"
%
(
ans
[
'sentence_pos'
],
ans
[
'start_span_pos'
],
ans
[
'end_span_pos'
],
ans_str
))
decoder
=
BeamDecoding
([
sample
[
1
]
for
sample
in
test_batch
],
*
outs
)
print_result
(
test_batch
,
decoder
.
decoding
(),
ids_2_word
,
print_top_k
=
10
)
def
infer
(
model_path
,
data_dir
,
test_batch_size
,
config
):
assert
os
.
path
.
exists
(
model_path
),
"The model does not exist."
paddle
.
init
(
use_gpu
=
Fals
e
,
trainer_count
=
1
)
paddle
.
init
(
use_gpu
=
Tru
e
,
trainer_count
=
1
)
ids_2_word
=
load_reverse_dict
(
config
.
dict_path
)
...
...
@@ -96,6 +73,8 @@ def infer(model_path, data_dir, test_batch_size, config):
# load the trained models
parameters
=
paddle
.
parameters
.
Parameters
.
from_tar
(
gzip
.
open
(
model_path
,
"r"
))
logger
.
info
(
"loading parameter is done."
)
inferer
=
paddle
.
inference
.
Inference
(
output_layer
=
outputs
,
parameters
=
parameters
)
...
...
@@ -115,5 +94,6 @@ def infer(model_path, data_dir, test_batch_size, config):
if
__name__
==
"__main__"
:
infer
(
"models/pass_00003.tar.gz"
,
TrainerConfig
.
data_dir
,
# infer("models/round1/pass_00000.tar.gz", TrainerConfig.data_dir,
infer
(
"models/round2_on_cpu/pass_00000.tar.gz"
,
TrainerConfig
.
data_dir
,
TrainerConfig
.
test_batch_size
,
ModelConfig
)
globally_normalized_reader/model.py
浏览文件 @
431f46fb
#!/usr/bin/env python
#coding=utf-8
import
pdb
import
paddle.v2
as
paddle
from
paddle.v2.layer
import
parse_network
import
basic_modules
...
...
@@ -35,6 +33,7 @@ def encode_question(input_embedding, config, prefix):
act
=
paddle
.
activation
.
Linear
())
weights
=
paddle
.
layer
.
fc
(
input
=
lstm_outs
,
size
=
1
,
bias_attr
=
False
,
act
=
paddle
.
activation
.
SequenceSoftmax
())
weighted_candidates
=
paddle
.
layer
.
scaling
(
input
=
candidates
,
weight
=
weights
)
passage_indep_embedding
=
paddle
.
layer
.
pooling
(
...
...
@@ -63,7 +62,12 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings,
weights
=
paddle
.
layer
.
fc
(
input
=
[
question_lstm_outs
,
doc_word_expand
],
param_attr
=
[
paddle
.
attr
.
Param
(
initial_std
=
1e-3
),
paddle
.
attr
.
Param
(
initial_std
=
1e-3
)
],
size
=
1
,
bias_attr
=
False
,
act
=
paddle
.
activation
.
SequenceSoftmax
())
weighted_candidates
=
paddle
.
layer
.
scaling
(
input
=
question_outs_proj
,
weight
=
weights
)
...
...
@@ -111,20 +115,26 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
is_infer
):
last_state_of_sentence
=
paddle
.
layer
.
last_seq
(
input
=
doc_lstm_outs
,
agg_level
=
paddle
.
layer
.
AggregateLevel
.
TO_SEQUENCE
)
sentence_scores
=
paddle
.
layer
.
fc
(
input
=
last_state_of_sentence
,
size
=
1
,
bias_attr
=
False
,
act
=
paddle
.
activation
.
Linear
())
sentence_scores
=
paddle
.
layer
.
fc
(
input
=
last_state_of_sentence
,
size
=
1
,
bias_attr
=
False
,
param_attr
=
paddle
.
attr
.
Param
(
initial_std
=
1e-3
),
act
=
paddle
.
activation
.
Linear
())
topk_sentence_ids
=
paddle
.
layer
.
kmax_sequence_score
(
input
=
sentence_scores
,
beam_size
=
config
.
beam_size
)
topk_sen
=
paddle
.
layer
.
sub_nested_seq
(
input
=
doc_lstm_outs
,
selected_indices
=
topk_sentence_ids
)
# expand beam to search start positions on selected sentences
start_pos_scores
=
paddle
.
layer
.
fc
(
input
=
topk_sen
,
size
=
1
,
bias_attr
=
False
,
act
=
paddle
.
activation
.
Linear
())
start_pos_scores
=
paddle
.
layer
.
fc
(
input
=
topk_sen
,
size
=
1
,
layer_attr
=
paddle
.
attr
.
ExtraLayerAttribute
(
error_clipping_threshold
=
10.0
),
bias_attr
=
False
,
param_attr
=
paddle
.
attr
.
Param
(
initial_std
=
1e-3
),
act
=
paddle
.
activation
.
Linear
())
topk_start_pos_ids
=
paddle
.
layer
.
kmax_sequence_score
(
input
=
start_pos_scores
,
beam_size
=
config
.
beam_size
)
topk_start_spans
=
paddle
.
layer
.
seq_slice
(
...
...
@@ -137,10 +147,12 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
depth
=
config
.
lstm_depth
,
drop_rate
=
config
.
lstm_hidden_droprate
,
prefix
=
"__end_span_embeddings__"
)
end_pos_scores
=
paddle
.
layer
.
fc
(
input
=
end_span_embedding
,
size
=
1
,
bias_attr
=
False
,
act
=
paddle
.
activation
.
Linear
())
end_pos_scores
=
paddle
.
layer
.
fc
(
input
=
end_span_embedding
,
size
=
1
,
bias_attr
=
False
,
param_attr
=
paddle
.
attr
.
Param
(
initial_std
=
1e-3
),
act
=
paddle
.
activation
.
Linear
())
topk_end_pos_ids
=
paddle
.
layer
.
kmax_sequence_score
(
input
=
end_pos_scores
,
beam_size
=
config
.
beam_size
)
...
...
globally_normalized_reader/train.py
浏览文件 @
431f46fb
...
...
@@ -2,7 +2,6 @@
#coding=utf-8
from
__future__
import
print_function
import
pdb
import
os
import
sys
import
logging
...
...
@@ -128,8 +127,12 @@ def build_event_handler(config, parameters, trainer, test_reader):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
and
\
(
not
event
.
batch_id
%
config
.
checkpoint_period
):
# save_path = os.path.join(config.save_dir,
# "checkpoint_param.latest.tar.gz")
save_path
=
os
.
path
.
join
(
config
.
save_dir
,
"checkpoint_param.latest.tar.gz"
)
"pass_%05d_%03d.tar.gz"
%
(
event
.
pass_id
,
event
.
batch_id
))
save_model
(
save_path
,
parameters
)
if
event
.
batch_id
and
not
event
.
batch_id
%
config
.
log_period
:
...
...
@@ -156,23 +159,27 @@ def train(model_config, trainer_config):
if
not
os
.
path
.
exists
(
trainer_config
.
save_dir
):
os
.
mkdir
(
trainer_config
.
save_dir
)
paddle
.
init
(
use_gpu
=
True
,
trainer_count
=
4
)
paddle
.
init
(
use_gpu
=
trainer_config
.
use_gpu
,
trainer_count
=
trainer_config
.
trainer_count
)
# define the optimizer
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
trainer_config
.
learning_rate
,
gradient_clipping_threshold
=
50
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
5e-4
),
model_average
=
paddle
.
optimizer
.
ModelAverage
(
average_window
=
0.5
))
model_average
=
paddle
.
optimizer
.
ModelAverage
(
average_window
=
0.5
,
max_average_window
=
1000
))
# define network topology
loss
=
GNR
(
model_config
)
parameters
=
paddle
.
parameters
.
create
(
loss
)
show_parameter_init_info
(
parameters
)
if
trainer_config
.
init_model_path
:
load_initial_model
(
trainer_config
.
init_model_path
,
parameters
)
else
:
show_parameter_init_info
(
parameters
)
# load the pre-trained embeddings
parameters
.
set
(
"GloveVectors"
,
load_pretrained_parameters
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录