Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
9f27a5ca
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
226
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9f27a5ca
编写于
6月 21, 2019
作者:
L
lilu
提交者:
Yibing Liu
6月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test=develop (#2314)
上级
d47e15e0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
101 addition
and
90 deletion
+101
-90
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
...ogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
+14
-11
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
...P/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
+5
-7
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
...P/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
+82
-72
未找到文件。
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
浏览文件 @
9f27a5ca
...
...
@@ -6,15 +6,16 @@ import sys
import
numpy
as
np
import
pandas
as
pd
def
get_p_at_n_in_m
(
data
,
n
,
m
,
ind
):
"""
Get n in m
"""
pos_score
=
data
[
ind
][
0
]
curr
=
data
[
ind
:
ind
+
m
]
curr
=
sorted
(
curr
,
key
=
lambda
x
:
x
[
0
],
reverse
=
True
)
if
curr
[
n
-
1
][
0
]
<=
pos_score
:
curr
=
data
[
ind
:
ind
+
m
]
curr
=
sorted
(
curr
,
key
=
lambda
x
:
x
[
0
],
reverse
=
True
)
if
curr
[
n
-
1
][
0
]
<=
pos_score
:
return
1
return
0
...
...
@@ -27,13 +28,14 @@ def evaluate_Recall(data):
p_at_1_in_10
=
0.0
p_at_2_in_10
=
0.0
p_at_5_in_10
=
0.0
length
=
len
(
data
)
/
10
for
i
in
xrange
(
0
,
length
):
length
=
len
(
data
)
//
10
print
(
'length=%s'
%
length
)
for
i
in
range
(
0
,
length
):
ind
=
i
*
10
assert
data
[
ind
][
1
]
==
1
p_at_1_in_2
+=
get_p_at_n_in_m
(
data
,
1
,
2
,
ind
)
p_at_1_in_10
+=
get_p_at_n_in_m
(
data
,
1
,
10
,
ind
)
p_at_2_in_10
+=
get_p_at_n_in_m
(
data
,
2
,
10
,
ind
)
...
...
@@ -43,8 +45,9 @@ def evaluate_Recall(data):
'1_in_2'
:
p_at_1_in_2
/
length
,
'1_in_10'
:
p_at_1_in_10
/
length
,
'2_in_10'
:
p_at_2_in_10
/
length
,
'5_in_10'
:
p_at_5_in_10
/
length
}
'5_in_10'
:
p_at_5_in_10
/
length
}
return
recall_dict
...
...
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
浏览文件 @
9f27a5ca
...
...
@@ -26,9 +26,8 @@ import copy
import
numpy
as
np
import
paddle.fluid
as
fluid
def
init_pretraining_params
(
exe
,
pretraining_params_path
,
main_program
):
def
init_pretraining_params
(
exe
,
pretraining_params_path
,
main_program
):
"""
Init pretraining params
"""
...
...
@@ -36,9 +35,9 @@ def init_pretraining_params(exe,
),
"[%s] cann't be found."
%
pretraining_params_path
def
existed_params
(
var
):
"""
Test existed
"""
"""
Test existed
"""
if
not
isinstance
(
var
,
fluid
.
framework
.
Parameter
):
return
False
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
var
.
name
))
...
...
@@ -50,4 +49,3 @@ def init_pretraining_params(exe,
predicate
=
existed_params
)
print
(
"Load pretraining parameters from {}."
.
format
(
pretraining_params_path
))
PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
浏览文件 @
9f27a5ca
...
...
@@ -23,6 +23,7 @@ sys.path.append('../../models/dialogue_model_toolkit/auto_dialogue_evaluation/')
from
net
import
Network
import
config
def
train
(
args
):
"""Train
"""
...
...
@@ -45,8 +46,7 @@ def train(args):
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByValue
(
max
=
1.0
,
min
=-
1.0
))
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
)
optimizer
.
minimize
(
loss
)
print
(
"begin memory optimization ..."
)
fluid
.
memory_optimize
(
train_program
)
...
...
@@ -59,7 +59,7 @@ def train(args):
test_startup
.
random_seed
=
110
with
fluid
.
program_guard
(
test_program
,
test_startup
):
with
fluid
.
unique_name
.
guard
():
logits
,
loss
=
net
.
network
(
args
.
loss_type
)
logits
,
loss
=
net
.
network
(
args
.
loss_type
)
loss
.
persistable
=
True
logits
.
persistable
=
True
...
...
@@ -73,9 +73,8 @@ def train(args):
print
(
"device count %d"
%
dev_count
)
print
(
"theoretical memory usage: "
)
print
(
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
))
print
(
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
))
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
train_startup
)
...
...
@@ -127,49 +126,54 @@ def train(args):
"""
Evaluate to choose model
"""
val_batches
=
reader
.
batch_reader
(
args
.
val_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
val_batches
=
reader
.
batch_reader
(
args
.
val_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
scores
=
[]
labels
=
[]
for
batch
in
val_batches
:
scores
.
extend
(
test_with_feed
(
batch
))
labels
.
extend
([
x
[
0
]
for
x
in
batch
[
2
]])
return
eva
.
evaluate_Recall
(
zip
(
scores
,
labels
))
return
eva
.
evaluate_Recall
(
list
(
zip
(
scores
,
labels
)
))
def
save_exe
(
step
,
best_recall
):
"""
Save exe conditional
"""
recall_dict
=
evaluate
()
print
(
'evaluation recall result:'
)
print
(
'1_in_2: %s
\t
1_in_10: %s
\t
2_in_10: %s
\t
5_in_10: %s'
%
(
recall_dict
[
'1_in_2'
],
recall_dict
[
'1_in_10'
],
recall_dict
[
'2_in_10'
],
recall_dict
[
'5_in_10'
]))
print
(
'1_in_2: %s
\t
1_in_10: %s
\t
2_in_10: %s
\t
5_in_10: %s'
%
(
recall_dict
[
'1_in_2'
],
recall_dict
[
'1_in_10'
],
recall_dict
[
'2_in_10'
],
recall_dict
[
'5_in_10'
]))
if
recall_dict
[
'1_in_10'
]
>
best_recall
and
step
!=
0
:
fluid
.
io
.
save_inference_model
(
args
.
save_path
,
net
.
get_feed_inference_names
(),
logits
,
exe
,
main_program
=
train_program
)
fluid
.
io
.
save_inference_model
(
args
.
save_path
,
net
.
get_feed_inference_names
(),
logits
,
exe
,
main_program
=
train_program
)
print
(
"Save model at step %d ... "
%
step
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
best_recall
=
recall_dict
[
'1_in_10'
]
return
best_recall
# train over different epoches
global_step
,
train_time
=
0
,
0.0
best_recall
=
0
best_recall
=
0
for
epoch
in
six
.
moves
.
xrange
(
args
.
num_scan_data
):
train_batches
=
reader
.
batch_reader
(
args
.
train_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
args
.
sample_pro
)
train_batches
=
reader
.
batch_reader
(
args
.
train_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
args
.
sample_pro
)
begin_time
=
time
.
time
()
sum_cost
=
0
ce_cost
=
0
for
batch
in
train_batches
:
if
(
args
.
save_path
is
not
None
)
and
(
global_step
%
args
.
save_step
==
0
):
if
(
args
.
save_path
is
not
None
)
and
(
global_step
%
args
.
save_step
==
0
):
best_recall
=
save_exe
(
global_step
,
best_recall
)
cost
=
train_with_feed
(
batch
)
...
...
@@ -178,7 +182,8 @@ def train(args):
ce_cost
=
cost
.
mean
()
if
global_step
%
args
.
print_step
==
0
:
print
(
'training step %s avg loss %s'
%
(
global_step
,
sum_cost
/
args
.
print_step
))
print
(
'training step %s avg loss %s'
%
(
global_step
,
sum_cost
/
args
.
print_step
))
sum_cost
=
0
pass_time_cost
=
time
.
time
()
-
begin_time
...
...
@@ -187,7 +192,8 @@ def train(args):
.
format
(
epoch
,
"%2.2f sec"
%
pass_time_cost
))
if
"CE_MODE_X"
in
os
.
environ
and
epoch
==
args
.
num_scan_data
-
1
:
card_num
=
get_cards
()
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
card_num
,
pass_time_cost
))
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
card_num
,
pass_time_cost
))
print
(
"kpis
\t
train_loss_card%s
\t
%s"
%
(
card_num
,
ce_cost
))
...
...
@@ -232,7 +238,7 @@ def finetune(args):
test_startup
.
random_seed
=
110
with
fluid
.
program_guard
(
test_program
,
test_startup
):
with
fluid
.
unique_name
.
guard
():
logits
,
loss
=
net
.
network
(
args
.
loss_type
)
logits
,
loss
=
net
.
network
(
args
.
loss_type
)
loss
.
persistable
=
True
logits
.
persistable
=
True
...
...
@@ -246,9 +252,8 @@ def finetune(args):
print
(
"device count %d"
%
dev_count
)
print
(
"theoretical memory usage: "
)
print
(
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
))
print
(
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
))
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
train_startup
)
...
...
@@ -264,9 +269,7 @@ def finetune(args):
if
args
.
init_model
:
init
.
init_pretraining_params
(
exe
,
args
.
init_model
,
main_program
=
train_startup
)
exe
,
args
.
init_model
,
main_program
=
train_startup
)
print
(
'sccuess init %s'
%
args
.
init_model
)
print
(
"start loading data ..."
)
...
...
@@ -294,8 +297,8 @@ def finetune(args):
"""
Evaluate to choose model
"""
val_batches
=
reader
.
batch_reader
(
args
.
val_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
val_batches
=
reader
.
batch_reader
(
args
.
val_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
scores
=
[]
labels
=
[]
for
batch
in
val_batches
:
...
...
@@ -303,7 +306,7 @@ def finetune(args):
labels
.
extend
([
x
[
0
]
for
x
in
batch
[
2
]])
scores
=
[
x
[
0
]
for
x
in
scores
]
return
eva
.
evaluate_cor
(
scores
,
labels
)
def
save_exe
(
step
,
best_cor
):
"""
Save exe conditional
...
...
@@ -311,28 +314,32 @@ def finetune(args):
cor
=
evaluate
()
print
(
'evaluation cor relevance %s'
%
cor
)
if
cor
>
best_cor
and
step
!=
0
:
fluid
.
io
.
save_inference_model
(
args
.
save_path
,
net
.
get_feed_inference_names
(),
logits
,
exe
,
main_program
=
train_program
)
fluid
.
io
.
save_inference_model
(
args
.
save_path
,
net
.
get_feed_inference_names
(),
logits
,
exe
,
main_program
=
train_program
)
print
(
"Save model at step %d ... "
%
step
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
best_cor
=
cor
return
best_cor
# train over different epoches
global_step
,
train_time
=
0
,
0.0
best_cor
=
0.0
best_cor
=
0.0
pre_index
=
-
1
for
epoch
in
six
.
moves
.
xrange
(
args
.
num_scan_data
):
train_batches
=
reader
.
batch_reader
(
args
.
train_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
args
.
sample_pro
)
train_batches
=
reader
.
batch_reader
(
args
.
train_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
args
.
sample_pro
)
begin_time
=
time
.
time
()
sum_cost
=
0
for
batch
in
train_batches
:
if
(
args
.
save_path
is
not
None
)
and
(
global_step
%
args
.
save_step
==
0
):
if
(
args
.
save_path
is
not
None
)
and
(
global_step
%
args
.
save_step
==
0
):
best_cor
=
save_exe
(
global_step
,
best_cor
)
cost
=
train_with_feed
(
batch
)
...
...
@@ -340,7 +347,8 @@ def finetune(args):
sum_cost
+=
cost
.
mean
()
if
global_step
%
args
.
print_step
==
0
:
print
(
'training step %s avg loss %s'
%
(
global_step
,
sum_cost
/
args
.
print_step
))
print
(
'training step %s avg loss %s'
%
(
global_step
,
sum_cost
/
args
.
print_step
))
sum_cost
=
0
pass_time_cost
=
time
.
time
()
-
begin_time
...
...
@@ -361,36 +369,38 @@ def evaluate(args):
with
fluid
.
scope_guard
(
fluid
.
Scope
()):
infer_program
,
feed_target_names
,
fetch_vars
=
fluid
.
io
.
load_inference_model
(
args
.
init_model
,
exe
)
print
(
'init model %s'
%
args
.
init_model
)
global_step
,
infer_time
=
0
,
0.0
test_batches
=
reader
.
batch_reader
(
args
.
test_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
test_batches
=
reader
.
batch_reader
(
args
.
test_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
scores
=
[]
labels
=
[]
for
batch
in
test_batches
:
logits
=
exe
.
run
(
infer_program
,
feed
=
{
'context_wordseq'
:
batch
[
0
],
'response_wordseq'
:
batch
[
1
]
},
fetch_list
=
fetch_vars
)
logits
=
[
x
[
0
]
for
x
in
logits
[
0
]]
logits
=
exe
.
run
(
infer_program
,
feed
=
{
'context_wordseq'
:
batch
[
0
],
'response_wordseq'
:
batch
[
1
]
},
fetch_list
=
fetch_vars
)
logits
=
[
x
[
0
]
for
x
in
logits
[
0
]]
scores
.
extend
(
logits
)
labels
.
extend
([
x
[
0
]
for
x
in
batch
[
2
]])
mean_score
=
sum
(
scores
)
/
len
(
scores
)
print
(
'len scores: %s len labels: %s'
%
(
len
(
scores
),
len
(
labels
)))
mean_score
=
sum
(
scores
)
/
len
(
scores
)
if
args
.
loss_type
==
'CLS'
:
recall_dict
=
eva
.
evaluate_Recall
(
zip
(
scores
,
labels
))
recall_dict
=
eva
.
evaluate_Recall
(
list
(
zip
(
scores
,
labels
)
))
print
(
'mean score: %s'
%
mean_score
)
print
(
'evaluation recall result:'
)
print
(
'1_in_2: %s
\t
1_in_10: %s
\t
2_in_10: %s
\t
5_in_10: %s'
%
(
recall_dict
[
'1_in_2'
],
recall_dict
[
'1_in_10'
],
recall_dict
[
'2_in_10'
],
recall_dict
[
'5_in_10'
]))
print
(
'1_in_2: %s
\t
1_in_10: %s
\t
2_in_10: %s
\t
5_in_10: %s'
%
(
recall_dict
[
'1_in_2'
],
recall_dict
[
'1_in_10'
],
recall_dict
[
'2_in_10'
],
recall_dict
[
'5_in_10'
]))
elif
args
.
loss_type
==
'L2'
:
cor
=
eva
.
evaluate_cor
(
scores
,
labels
)
print
(
'mean score: %s
\n
evaluation cor resuls:%s'
%
(
mean_score
,
cor
))
print
(
'mean score: %s
\n
evaluation cor resuls:%s'
%
(
mean_score
,
cor
))
else
:
raise
ValueError
...
...
@@ -413,18 +423,17 @@ def infer(args):
args
.
init_model
,
exe
)
global_step
,
infer_time
=
0
,
0.0
test_batches
=
reader
.
batch_reader
(
args
.
test_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
test_batches
=
reader
.
batch_reader
(
args
.
test_path
,
args
.
batch_size
,
place
,
args
.
max_len
,
1
)
scores
=
[]
for
batch
in
test_batches
:
logits
=
exe
.
run
(
infer_program
,
feed
=
{
'context_wordseq'
:
batch
[
0
],
'response_wordseq'
:
batch
[
1
]
},
fetch_list
=
fetch_vars
)
logits
=
[
x
[
0
]
for
x
in
logits
[
0
]]
logits
=
exe
.
run
(
infer_program
,
feed
=
{
'context_wordseq'
:
batch
[
0
],
'response_wordseq'
:
batch
[
1
]
},
fetch_list
=
fetch_vars
)
logits
=
[
x
[
0
]
for
x
in
logits
[
0
]]
scores
.
extend
(
logits
)
...
...
@@ -433,7 +442,7 @@ def infer(args):
out_file
=
open
(
out_path
,
'w'
)
for
line
,
s
in
zip
(
in_file
,
scores
):
out_file
.
write
(
'%s
\t
%s
\n
'
%
(
line
.
strip
(),
s
))
in_file
.
close
()
out_file
.
close
()
...
...
@@ -471,5 +480,6 @@ def main():
else
:
raise
ValueError
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录