Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
8997bb63
M
models
项目概览
PaddlePaddle
/
models
大约 2 年 前同步成功
通知
232
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8997bb63
编写于
10月 27, 2018
作者:
G
guru4elephant
提交者:
GitHub
10月 27, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request
#4
from PaddlePaddle/develop
merge from remote
上级
81e9797a
514fab8b
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
774 addition
and
464 deletion
+774
-464
fluid/deep_attention_matching_net/douban/train.sh
fluid/deep_attention_matching_net/douban/train.sh
+1
-0
fluid/deep_attention_matching_net/model.py
fluid/deep_attention_matching_net/model.py
+72
-32
fluid/deep_attention_matching_net/train_and_evaluate.py
fluid/deep_attention_matching_net/train_and_evaluate.py
+182
-64
fluid/deep_attention_matching_net/ubuntu/train.sh
fluid/deep_attention_matching_net/ubuntu/train.sh
+1
-0
fluid/deep_attention_matching_net/utils/reader.py
fluid/deep_attention_matching_net/utils/reader.py
+14
-14
fluid/image_classification/dist_train/args.py
fluid/image_classification/dist_train/args.py
+0
-118
fluid/image_classification/dist_train/dist_train.py
fluid/image_classification/dist_train/dist_train.py
+114
-78
fluid/image_classification/models/__init__.py
fluid/image_classification/models/__init__.py
+2
-0
fluid/image_classification/models/learning_rate.py
fluid/image_classification/models/learning_rate.py
+28
-0
fluid/image_classification/models/resnet_dist.py
fluid/image_classification/models/resnet_dist.py
+121
-0
fluid/machine_reading_comprehension/dataset.py
fluid/machine_reading_comprehension/dataset.py
+19
-36
fluid/machine_reading_comprehension/rc_model.py
fluid/machine_reading_comprehension/rc_model.py
+38
-30
fluid/machine_reading_comprehension/run.py
fluid/machine_reading_comprehension/run.py
+179
-90
fluid/machine_reading_comprehension/run.sh
fluid/machine_reading_comprehension/run.sh
+3
-2
未找到文件。
fluid/deep_attention_matching_net/douban/train.sh
浏览文件 @
8997bb63
...
@@ -5,6 +5,7 @@ python -u ../train_and_evaluate.py --use_cuda \
...
@@ -5,6 +5,7 @@ python -u ../train_and_evaluate.py --use_cuda \
--ext_eval
\
--ext_eval
\
--word_emb_init
./data/word_embedding.pkl
\
--word_emb_init
./data/word_embedding.pkl
\
--save_path
./models
\
--save_path
./models
\
--use_pyreader
\
--batch_size
256
\
--batch_size
256
\
--vocab_size
172130
\
--vocab_size
172130
\
--channel1_num
16
\
--channel1_num
16
\
...
...
fluid/deep_attention_matching_net/model.py
浏览文件 @
8997bb63
...
@@ -15,45 +15,85 @@ class Net(object):
...
@@ -15,45 +15,85 @@ class Net(object):
self
.
_stack_num
=
stack_num
self
.
_stack_num
=
stack_num
self
.
_channel1_num
=
channel1_num
self
.
_channel1_num
=
channel1_num
self
.
_channel2_num
=
channel2_num
self
.
_channel2_num
=
channel2_num
self
.
_feed_names
=
[]
self
.
word_emb_name
=
"shared_word_emb"
self
.
word_emb_name
=
"shared_word_emb"
self
.
use_stack_op
=
True
self
.
use_stack_op
=
True
self
.
use_mask_cache
=
True
self
.
use_mask_cache
=
True
self
.
use_sparse_embedding
=
True
self
.
use_sparse_embedding
=
True
def
set_word_embedding
(
self
,
word_emb
,
place
):
def
create_py_reader
(
self
,
capacity
,
name
):
word_emb_param
=
fluid
.
global_scope
().
find_var
(
# turns ids
self
.
word_emb_name
).
get_tensor
()
shapes
=
[[
-
1
,
self
.
_max_turn_len
,
1
]
word_emb_param
.
set
(
word_emb
,
place
)
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
)]
dtypes
=
[
"int32"
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
)]
def
create_network
(
self
):
# turns mask
mask_cache
=
dict
()
if
self
.
use_mask_cache
else
None
shapes
+=
[[
-
1
,
self
.
_max_turn_len
,
1
]
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
)]
turns_data
=
[]
dtypes
+=
[
"float32"
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
)]
# response ids, response mask, label
shapes
+=
[[
-
1
,
self
.
_max_turn_len
,
1
],
[
-
1
,
self
.
_max_turn_len
,
1
],
[
-
1
,
1
]]
dtypes
+=
[
"int32"
,
"float32"
,
"float32"
]
py_reader
=
fluid
.
layers
.
py_reader
(
capacity
=
capacity
,
shapes
=
shapes
,
lod_levels
=
[
0
]
*
(
2
*
self
.
_max_turn_num
+
3
),
dtypes
=
dtypes
,
name
=
name
,
use_double_buffer
=
True
)
data_vars
=
fluid
.
layers
.
read_file
(
py_reader
)
self
.
turns_data
=
data_vars
[
0
:
self
.
_max_turn_num
]
self
.
turns_mask
=
data_vars
[
self
.
_max_turn_num
:
2
*
self
.
_max_turn_num
]
self
.
response
=
data_vars
[
-
3
]
self
.
response_mask
=
data_vars
[
-
2
]
self
.
label
=
data_vars
[
-
1
]
return
py_reader
def
create_data_layers
(
self
):
self
.
_feed_names
=
[]
self
.
turns_data
=
[]
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
name
=
"turn_%d"
%
i
turn
=
fluid
.
layers
.
data
(
turn
=
fluid
.
layers
.
data
(
name
=
"turn_%d"
%
i
,
name
=
name
,
shape
=
[
self
.
_max_turn_len
,
1
],
dtype
=
"int32"
)
shape
=
[
self
.
_max_turn_len
,
1
],
self
.
turns_data
.
append
(
turn
)
dtype
=
"int32"
)
self
.
_feed_names
.
append
(
name
)
turns_data
.
append
(
turn
)
turns_mask
=
[]
self
.
turns_mask
=
[]
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
for
i
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
name
=
"turn_mask_%d"
%
i
turn_mask
=
fluid
.
layers
.
data
(
turn_mask
=
fluid
.
layers
.
data
(
name
=
"turn_mask_%d"
%
i
,
name
=
name
,
shape
=
[
self
.
_max_turn_len
,
1
],
dtype
=
"float32"
)
shape
=
[
self
.
_max_turn_len
,
1
],
self
.
turns_mask
.
append
(
turn_mask
)
dtype
=
"float32"
)
self
.
_feed_names
.
append
(
name
)
turns_mask
.
append
(
turn_mask
)
response
=
fluid
.
layers
.
data
(
self
.
response
=
fluid
.
layers
.
data
(
name
=
"response"
,
shape
=
[
self
.
_max_turn_len
,
1
],
dtype
=
"int32"
)
name
=
"response"
,
shape
=
[
self
.
_max_turn_len
,
1
],
dtype
=
"int32"
)
response_mask
=
fluid
.
layers
.
data
(
self
.
response_mask
=
fluid
.
layers
.
data
(
name
=
"response_mask"
,
name
=
"response_mask"
,
shape
=
[
self
.
_max_turn_len
,
1
],
shape
=
[
self
.
_max_turn_len
,
1
],
dtype
=
"float32"
)
dtype
=
"float32"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"float32"
)
self
.
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"float32"
)
self
.
_feed_names
+=
[
"response"
,
"response_mask"
,
"label"
]
def
get_feed_names
(
self
):
return
self
.
_feed_names
def
set_word_embedding
(
self
,
word_emb
,
place
):
word_emb_param
=
fluid
.
global_scope
().
find_var
(
self
.
word_emb_name
).
get_tensor
()
word_emb_param
.
set
(
word_emb
,
place
)
def
create_network
(
self
):
mask_cache
=
dict
()
if
self
.
use_mask_cache
else
None
response_emb
=
fluid
.
layers
.
embedding
(
response_emb
=
fluid
.
layers
.
embedding
(
input
=
response
,
input
=
self
.
response
,
size
=
[
self
.
_vocab_size
+
1
,
self
.
_emb_size
],
size
=
[
self
.
_vocab_size
+
1
,
self
.
_emb_size
],
is_sparse
=
self
.
use_sparse_embedding
,
is_sparse
=
self
.
use_sparse_embedding
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
...
@@ -71,8 +111,8 @@ class Net(object):
...
@@ -71,8 +111,8 @@ class Net(object):
key
=
Hr
,
key
=
Hr
,
value
=
Hr
,
value
=
Hr
,
d_key
=
self
.
_emb_size
,
d_key
=
self
.
_emb_size
,
q_mask
=
response_mask
,
q_mask
=
self
.
response_mask
,
k_mask
=
response_mask
,
k_mask
=
self
.
response_mask
,
mask_cache
=
mask_cache
)
mask_cache
=
mask_cache
)
Hr_stack
.
append
(
Hr
)
Hr_stack
.
append
(
Hr
)
...
@@ -80,7 +120,7 @@ class Net(object):
...
@@ -80,7 +120,7 @@ class Net(object):
sim_turns
=
[]
sim_turns
=
[]
for
t
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
for
t
in
six
.
moves
.
xrange
(
self
.
_max_turn_num
):
Hu
=
fluid
.
layers
.
embedding
(
Hu
=
fluid
.
layers
.
embedding
(
input
=
turns_data
[
t
],
input
=
self
.
turns_data
[
t
],
size
=
[
self
.
_vocab_size
+
1
,
self
.
_emb_size
],
size
=
[
self
.
_vocab_size
+
1
,
self
.
_emb_size
],
is_sparse
=
self
.
use_sparse_embedding
,
is_sparse
=
self
.
use_sparse_embedding
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
...
@@ -96,8 +136,8 @@ class Net(object):
...
@@ -96,8 +136,8 @@ class Net(object):
key
=
Hu
,
key
=
Hu
,
value
=
Hu
,
value
=
Hu
,
d_key
=
self
.
_emb_size
,
d_key
=
self
.
_emb_size
,
q_mask
=
turns_mask
[
t
],
q_mask
=
self
.
turns_mask
[
t
],
k_mask
=
turns_mask
[
t
],
k_mask
=
self
.
turns_mask
[
t
],
mask_cache
=
mask_cache
)
mask_cache
=
mask_cache
)
Hu_stack
.
append
(
Hu
)
Hu_stack
.
append
(
Hu
)
...
@@ -111,8 +151,8 @@ class Net(object):
...
@@ -111,8 +151,8 @@ class Net(object):
key
=
Hr_stack
[
index
],
key
=
Hr_stack
[
index
],
value
=
Hr_stack
[
index
],
value
=
Hr_stack
[
index
],
d_key
=
self
.
_emb_size
,
d_key
=
self
.
_emb_size
,
q_mask
=
turns_mask
[
t
],
q_mask
=
self
.
turns_mask
[
t
],
k_mask
=
response_mask
,
k_mask
=
self
.
response_mask
,
mask_cache
=
mask_cache
)
mask_cache
=
mask_cache
)
r_a_t
=
layers
.
block
(
r_a_t
=
layers
.
block
(
name
=
"r_attend_t_"
+
str
(
index
),
name
=
"r_attend_t_"
+
str
(
index
),
...
@@ -120,8 +160,8 @@ class Net(object):
...
@@ -120,8 +160,8 @@ class Net(object):
key
=
Hu_stack
[
index
],
key
=
Hu_stack
[
index
],
value
=
Hu_stack
[
index
],
value
=
Hu_stack
[
index
],
d_key
=
self
.
_emb_size
,
d_key
=
self
.
_emb_size
,
q_mask
=
response_mask
,
q_mask
=
self
.
response_mask
,
k_mask
=
turns_mask
[
t
],
k_mask
=
self
.
turns_mask
[
t
],
mask_cache
=
mask_cache
)
mask_cache
=
mask_cache
)
t_a_r_stack
.
append
(
t_a_r
)
t_a_r_stack
.
append
(
t_a_r
)
...
@@ -158,5 +198,5 @@ class Net(object):
...
@@ -158,5 +198,5 @@ class Net(object):
sim
=
fluid
.
layers
.
concat
(
input
=
sim_turns
,
axis
=
2
)
sim
=
fluid
.
layers
.
concat
(
input
=
sim_turns
,
axis
=
2
)
final_info
=
layers
.
cnn_3d
(
sim
,
self
.
_channel1_num
,
self
.
_channel2_num
)
final_info
=
layers
.
cnn_3d
(
sim
,
self
.
_channel1_num
,
self
.
_channel2_num
)
loss
,
logits
=
layers
.
loss
(
final_info
,
label
)
loss
,
logits
=
layers
.
loss
(
final_info
,
self
.
label
)
return
loss
,
logits
return
loss
,
logits
fluid/deep_attention_matching_net/train_and_evaluate.py
浏览文件 @
8997bb63
...
@@ -7,7 +7,7 @@ import multiprocessing
...
@@ -7,7 +7,7 @@ import multiprocessing
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
utils.reader
as
reader
import
utils.reader
as
reader
from
utils.util
import
print_arguments
from
utils.util
import
print_arguments
,
mkdir
try
:
try
:
import
cPickle
as
pickle
#python 2
import
cPickle
as
pickle
#python 2
...
@@ -49,6 +49,10 @@ def parse_args():
...
@@ -49,6 +49,10 @@ def parse_args():
'--use_cuda'
,
'--use_cuda'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'If set, use cuda for training.'
)
help
=
'If set, use cuda for training.'
)
parser
.
add_argument
(
'--use_pyreader'
,
action
=
'store_true'
,
help
=
'If set, use pyreader for reading data.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--ext_eval'
,
'--ext_eval'
,
action
=
'store_true'
,
action
=
'store_true'
,
...
@@ -105,7 +109,75 @@ def parse_args():
...
@@ -105,7 +109,75 @@ def parse_args():
#yapf: enable
#yapf: enable
def
evaluate
(
score_path
,
result_file_path
):
if
args
.
ext_eval
:
import
utils.douban_evaluation
as
eva
else
:
import
utils.evaluation
as
eva
#write evaluation result
result
=
eva
.
evaluate
(
score_path
)
with
open
(
result_file_path
,
'w'
)
as
out_file
:
for
p_at
in
result
:
out_file
.
write
(
str
(
p_at
)
+
'
\n
'
)
print
(
'finish evaluation'
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
def
test_with_feed
(
exe
,
program
,
feed_names
,
fetch_list
,
score_path
,
batches
,
batch_num
,
dev_count
):
score_file
=
open
(
score_path
,
'w'
)
for
it
in
six
.
moves
.
xrange
(
batch_num
//
dev_count
):
feed_list
=
[]
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
val_index
=
it
*
dev_count
+
dev
batch_data
=
reader
.
make_one_batch_input
(
batches
,
val_index
)
feed_dict
=
dict
(
zip
(
feed_names
,
batch_data
))
feed_list
.
append
(
feed_dict
)
predicts
=
exe
.
run
(
feed
=
feed_list
,
fetch_list
=
fetch_list
)
scores
=
np
.
array
(
predicts
[
0
])
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
val_index
=
it
*
dev_count
+
dev
for
i
in
six
.
moves
.
xrange
(
args
.
batch_size
):
score_file
.
write
(
str
(
scores
[
args
.
batch_size
*
dev
+
i
][
0
])
+
'
\t
'
+
str
(
batches
[
"label"
][
val_index
][
i
])
+
'
\n
'
)
score_file
.
close
()
def
test_with_pyreader
(
exe
,
program
,
pyreader
,
fetch_list
,
score_path
,
batches
,
batch_num
,
dev_count
):
def
data_provider
():
for
index
in
six
.
moves
.
xrange
(
batch_num
):
yield
reader
.
make_one_batch_input
(
batches
,
index
)
score_file
=
open
(
score_path
,
'w'
)
pyreader
.
decorate_tensor_provider
(
data_provider
)
it
=
0
pyreader
.
start
()
while
True
:
try
:
predicts
=
exe
.
run
(
fetch_list
=
fetch_list
)
scores
=
np
.
array
(
predicts
[
0
])
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
val_index
=
it
*
dev_count
+
dev
for
i
in
six
.
moves
.
xrange
(
args
.
batch_size
):
score_file
.
write
(
str
(
scores
[
args
.
batch_size
*
dev
+
i
][
0
])
+
'
\t
'
+
str
(
batches
[
"label"
][
val_index
][
i
])
+
'
\n
'
)
it
+=
1
except
fluid
.
core
.
EOFException
:
pyreader
.
reset
()
break
score_file
.
close
()
def
train
(
args
):
def
train
(
args
):
if
not
os
.
path
.
exists
(
args
.
save_path
):
os
.
makedirs
(
args
.
save_path
)
# data data_config
# data data_config
data_conf
=
{
data_conf
=
{
"batch_size"
:
args
.
batch_size
,
"batch_size"
:
args
.
batch_size
,
...
@@ -117,27 +189,47 @@ def train(args):
...
@@ -117,27 +189,47 @@ def train(args):
dam
=
Net
(
args
.
max_turn_num
,
args
.
max_turn_len
,
args
.
vocab_size
,
dam
=
Net
(
args
.
max_turn_num
,
args
.
max_turn_len
,
args
.
vocab_size
,
args
.
emb_size
,
args
.
stack_num
,
args
.
channel1_num
,
args
.
emb_size
,
args
.
stack_num
,
args
.
channel1_num
,
args
.
channel2_num
)
args
.
channel2_num
)
loss
,
logits
=
dam
.
create_network
()
loss
.
persistable
=
True
train_program
=
fluid
.
Program
()
logits
.
persistable
=
True
train_startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_program
,
train_startup
):
train_program
=
fluid
.
default_main_program
()
with
fluid
.
unique_name
.
guard
():
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
if
args
.
use_pyreader
:
train_pyreader
=
dam
.
create_py_reader
(
# gradient clipping
capacity
=
10
,
name
=
'train_reader'
)
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByValue
(
else
:
max
=
1.0
,
min
=-
1.0
))
dam
.
create_data_layers
()
loss
,
logits
=
dam
.
create_network
()
optimizer
=
fluid
.
optimizer
.
Adam
(
loss
.
persistable
=
True
learning_rate
=
fluid
.
layers
.
exponential_decay
(
logits
.
persistable
=
True
learning_rate
=
args
.
learning_rate
,
# gradient clipping
decay_steps
=
400
,
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByValue
(
decay_rate
=
0.9
,
max
=
1.0
,
min
=-
1.0
))
staircase
=
True
))
optimizer
.
minimize
(
loss
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
fluid
.
layers
.
exponential_decay
(
fluid
.
memory_optimize
(
train_program
)
learning_rate
=
args
.
learning_rate
,
decay_steps
=
400
,
decay_rate
=
0.9
,
staircase
=
True
))
optimizer
.
minimize
(
loss
)
fluid
.
memory_optimize
(
train_program
)
test_program
=
fluid
.
Program
()
test_startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_program
,
test_startup
):
with
fluid
.
unique_name
.
guard
():
if
args
.
use_pyreader
:
test_pyreader
=
dam
.
create_py_reader
(
capacity
=
10
,
name
=
'test_reader'
)
else
:
dam
.
create_data_layers
()
loss
,
logits
=
dam
.
create_network
()
loss
.
persistable
=
True
logits
.
persistable
=
True
test_program
=
test_program
.
clone
(
for_test
=
True
)
if
args
.
use_cuda
:
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
place
=
fluid
.
CUDAPlace
(
0
)
...
@@ -152,7 +244,8 @@ def train(args):
...
@@ -152,7 +244,8 @@ def train(args):
program
=
train_program
,
batch_size
=
args
.
batch_size
))
program
=
train_program
,
batch_size
=
args
.
batch_size
))
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
train_startup
)
exe
.
run
(
test_startup
)
train_exe
=
fluid
.
ParallelExecutor
(
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
train_program
)
use_cuda
=
args
.
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
train_program
)
...
@@ -162,11 +255,6 @@ def train(args):
...
@@ -162,11 +255,6 @@ def train(args):
main_program
=
test_program
,
main_program
=
test_program
,
share_vars_from
=
train_exe
)
share_vars_from
=
train_exe
)
if
args
.
ext_eval
:
import
utils.douban_evaluation
as
eva
else
:
import
utils.evaluation
as
eva
if
args
.
word_emb_init
is
not
None
:
if
args
.
word_emb_init
is
not
None
:
print
(
"start loading word embedding init ..."
)
print
(
"start loading word embedding init ..."
)
if
six
.
PY2
:
if
six
.
PY2
:
...
@@ -199,17 +287,15 @@ def train(args):
...
@@ -199,17 +287,15 @@ def train(args):
print
(
"begin model training ..."
)
print
(
"begin model training ..."
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
step
=
0
# train on one epoch data by feeding
for
epoch
in
six
.
moves
.
xrange
(
args
.
num_scan_data
):
def
train_with_feed
(
step
):
shuffle_train
=
reader
.
unison_shuffle
(
train_data
)
train_batches
=
reader
.
build_batches
(
shuffle_train
,
data_conf
)
ave_cost
=
0.0
ave_cost
=
0.0
for
it
in
six
.
moves
.
xrange
(
batch_num
//
dev_count
):
for
it
in
six
.
moves
.
xrange
(
batch_num
//
dev_count
):
feed_list
=
[]
feed_list
=
[]
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
index
=
it
*
dev_count
+
dev
index
=
it
*
dev_count
+
dev
feed_dict
=
reader
.
make_one_batch_input
(
train_batches
,
index
)
batch_data
=
reader
.
make_one_batch_input
(
train_batches
,
index
)
feed_dict
=
dict
(
zip
(
dam
.
get_feed_names
(),
batch_data
))
feed_list
.
append
(
feed_dict
)
feed_list
.
append
(
feed_dict
)
cost
=
train_exe
.
run
(
feed
=
feed_list
,
fetch_list
=
[
loss
.
name
])
cost
=
train_exe
.
run
(
feed
=
feed_list
,
fetch_list
=
[
loss
.
name
])
...
@@ -226,41 +312,73 @@ def train(args):
...
@@ -226,41 +312,73 @@ def train(args):
print
(
"Save model at step %d ... "
%
step
)
print
(
"Save model at step %d ... "
%
step
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
time
.
localtime
(
time
.
time
())))
fluid
.
io
.
save_persistables
(
exe
,
save_path
)
fluid
.
io
.
save_persistables
(
exe
,
save_path
,
train_program
)
score_path
=
os
.
path
.
join
(
args
.
save_path
,
'score.'
+
str
(
step
))
score_path
=
os
.
path
.
join
(
args
.
save_path
,
'score.'
+
str
(
step
))
score_file
=
open
(
score_path
,
'w'
)
test_with_feed
(
test_exe
,
test_program
,
for
it
in
six
.
moves
.
xrange
(
val_batch_num
//
dev_count
):
dam
.
get_feed_names
(),
[
logits
.
name
],
score_path
,
feed_list
=
[]
val_batches
,
val_batch_num
,
dev_count
)
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
val_index
=
it
*
dev_count
+
dev
feed_dict
=
reader
.
make_one_batch_input
(
val_batches
,
val_index
)
feed_list
.
append
(
feed_dict
)
predicts
=
test_exe
.
run
(
feed
=
feed_list
,
fetch_list
=
[
logits
.
name
])
scores
=
np
.
array
(
predicts
[
0
])
for
dev
in
six
.
moves
.
xrange
(
dev_count
):
val_index
=
it
*
dev_count
+
dev
for
i
in
six
.
moves
.
xrange
(
args
.
batch_size
):
score_file
.
write
(
str
(
scores
[
args
.
batch_size
*
dev
+
i
][
0
])
+
'
\t
'
+
str
(
val_batches
[
"label"
][
val_index
][
i
])
+
'
\n
'
)
score_file
.
close
()
#write evaluation result
result
=
eva
.
evaluate
(
score_path
)
result_file_path
=
os
.
path
.
join
(
args
.
save_path
,
result_file_path
=
os
.
path
.
join
(
args
.
save_path
,
'result.'
+
str
(
step
))
'result.'
+
str
(
step
))
with
open
(
result_file_path
,
'w'
)
as
out_file
:
evaluate
(
score_path
,
result_file_path
)
for
p_at
in
result
:
return
step
out_file
.
write
(
str
(
p_at
)
+
'
\n
'
)
print
(
'finish evaluation'
)
# train on one epoch with pyreader
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
def
train_with_pyreader
(
step
):
time
.
localtime
(
time
.
time
())))
def
data_provider
():
for
index
in
six
.
moves
.
xrange
(
batch_num
):
yield
reader
.
make_one_batch_input
(
train_batches
,
index
)
train_pyreader
.
decorate_tensor_provider
(
data_provider
)
ave_cost
=
0.0
train_pyreader
.
start
()
while
True
:
try
:
cost
=
train_exe
.
run
(
fetch_list
=
[
loss
.
name
])
ave_cost
+=
np
.
array
(
cost
[
0
]).
mean
()
step
=
step
+
1
if
step
%
print_step
==
0
:
print
(
"processed: ["
+
str
(
step
*
dev_count
*
1.0
/
batch_num
)
+
"] ave loss: ["
+
str
(
ave_cost
/
print_step
)
+
"]"
)
ave_cost
=
0.0
if
(
args
.
save_path
is
not
None
)
and
(
step
%
save_step
==
0
):
save_path
=
os
.
path
.
join
(
args
.
save_path
,
"step_"
+
str
(
step
))
print
(
"Save model at step %d ... "
%
step
)
print
(
time
.
strftime
(
'%Y-%m-%d %H:%M:%S'
,
time
.
localtime
(
time
.
time
())))
fluid
.
io
.
save_persistables
(
exe
,
save_path
,
train_program
)
score_path
=
os
.
path
.
join
(
args
.
save_path
,
'score.'
+
str
(
step
))
test_with_pyreader
(
test_exe
,
test_program
,
test_pyreader
,
[
logits
.
name
],
score_path
,
val_batches
,
val_batch_num
,
dev_count
)
result_file_path
=
os
.
path
.
join
(
args
.
save_path
,
'result.'
+
str
(
step
))
evaluate
(
score_path
,
result_file_path
)
except
fluid
.
core
.
EOFException
:
train_pyreader
.
reset
()
break
return
step
# train over different epoches
global_step
=
0
for
epoch
in
six
.
moves
.
xrange
(
args
.
num_scan_data
):
shuffle_train
=
reader
.
unison_shuffle
(
train_data
)
train_batches
=
reader
.
build_batches
(
shuffle_train
,
data_conf
)
if
args
.
use_pyreader
:
global_step
=
train_with_pyreader
(
global_step
)
else
:
global_step
=
train_with_feed
(
global_step
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
fluid/deep_attention_matching_net/ubuntu/train.sh
浏览文件 @
8997bb63
...
@@ -4,6 +4,7 @@ python -u ../train_and_evaluate.py --use_cuda \
...
@@ -4,6 +4,7 @@ python -u ../train_and_evaluate.py --use_cuda \
--data_path
./data/data.pkl
\
--data_path
./data/data.pkl
\
--word_emb_init
./data/word_embedding.pkl
\
--word_emb_init
./data/word_embedding.pkl
\
--save_path
./models
\
--save_path
./models
\
--use_pyreader
\
--batch_size
256
\
--batch_size
256
\
--vocab_size
434512
\
--vocab_size
434512
\
--emb_size
200
\
--emb_size
200
\
...
...
fluid/deep_attention_matching_net/utils/reader.py
浏览文件 @
8997bb63
...
@@ -202,30 +202,30 @@ def make_one_batch_input(data_batches, index):
...
@@ -202,30 +202,30 @@ def make_one_batch_input(data_batches, index):
every_turn_len
[:,
i
]
for
i
in
six
.
moves
.
xrange
(
max_turn_num
)
every_turn_len
[:,
i
]
for
i
in
six
.
moves
.
xrange
(
max_turn_num
)
]
]
feed_
dict
=
{}
feed_
list
=
[]
for
i
,
turn
in
enumerate
(
turns_list
):
for
i
,
turn
in
enumerate
(
turns_list
):
feed_dict
[
"turn_%d"
%
i
]
=
turn
turn
=
np
.
expand_dims
(
turn
,
axis
=-
1
)
feed_dict
[
"turn_%d"
%
i
]
=
np
.
expand_dims
(
feed_list
.
append
(
turn
)
feed_dict
[
"turn_%d"
%
i
],
axis
=-
1
)
for
i
,
turn_len
in
enumerate
(
every_turn_len_list
):
for
i
,
turn_len
in
enumerate
(
every_turn_len_list
):
feed_dict
[
"turn_mask_%d"
%
i
]
=
np
.
ones
(
turn_mask
=
np
.
ones
((
batch_size
,
max_turn_len
,
1
)).
astype
(
"float32"
)
(
batch_size
,
max_turn_len
,
1
)).
astype
(
"float32"
)
for
row
in
six
.
moves
.
xrange
(
batch_size
):
for
row
in
six
.
moves
.
xrange
(
batch_size
):
feed_dict
[
"turn_mask_%d"
%
i
][
row
,
turn_len
[
row
]:,
0
]
=
0
turn_mask
[
row
,
turn_len
[
row
]:,
0
]
=
0
feed_list
.
append
(
turn_mask
)
feed_dict
[
"response"
]
=
response
response
=
np
.
expand_dims
(
response
,
axis
=-
1
)
feed_
dict
[
"response"
]
=
np
.
expand_dims
(
feed_dict
[
"response"
],
axis
=-
1
)
feed_
list
.
append
(
response
)
feed_dict
[
"response_mask"
]
=
np
.
ones
(
response_mask
=
np
.
ones
((
batch_size
,
max_turn_len
,
1
)).
astype
(
"float32"
)
(
batch_size
,
max_turn_len
,
1
)).
astype
(
"float32"
)
for
row
in
six
.
moves
.
xrange
(
batch_size
):
for
row
in
six
.
moves
.
xrange
(
batch_size
):
feed_dict
[
"response_mask"
][
row
,
response_len
[
row
]:,
0
]
=
0
response_mask
[
row
,
response_len
[
row
]:,
0
]
=
0
feed_list
.
append
(
response_mask
)
feed_dict
[
"label"
]
=
np
.
array
([
data_batches
[
"label"
][
index
]]).
reshape
(
label
=
np
.
array
([
data_batches
[
"label"
][
index
]]).
reshape
(
[
-
1
,
1
]).
astype
(
"float32"
)
[
-
1
,
1
]).
astype
(
"float32"
)
feed_list
.
append
(
label
)
return
feed_
dic
t
return
feed_
lis
t
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
fluid/image_classification/dist_train/args.py
已删除
100644 → 0
浏览文件 @
81e9797a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
__all__
=
[
'parse_args'
,
]
BENCHMARK_MODELS
=
[
"ResNet50"
,
"ResNet101"
,
"ResNet152"
]
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
'Distributed Image Classification Training.'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
choices
=
BENCHMARK_MODELS
,
default
=
'resnet'
,
help
=
'The model to run benchmark with.'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size.'
)
# args related to learning rate
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.001
,
help
=
'The learning rate.'
)
# TODO(wuyi): add "--use_fake_data" option back.
parser
.
add_argument
(
'--skip_batch_num'
,
type
=
int
,
default
=
5
,
help
=
'The first num of minibatch num to skip, for better performance test'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
100
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
'--data_format'
,
type
=
str
,
default
=
'NCHW'
,
choices
=
[
'NCHW'
,
'NHWC'
],
help
=
'The data data_format, now only support NCHW.'
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
'The device type.'
)
parser
.
add_argument
(
'--gpus'
,
type
=
int
,
default
=
1
,
help
=
'If gpus > 1, will use ParallelExecutor to run, else use Executor.'
)
# this option is available only for vgg and resnet.
parser
.
add_argument
(
'--cpus'
,
type
=
int
,
default
=
1
,
help
=
'If cpus > 1, will set ParallelExecutor to use multiple threads.'
)
parser
.
add_argument
(
'--data_set'
,
type
=
str
,
default
=
'flowers'
,
choices
=
[
'cifar10'
,
'flowers'
,
'imagenet'
],
help
=
'Optional dataset for benchmark.'
)
parser
.
add_argument
(
'--no_test'
,
action
=
'store_true'
,
help
=
'If set, do not test the testset during training.'
)
parser
.
add_argument
(
'--memory_optimize'
,
action
=
'store_true'
,
help
=
'If set, optimize runtime memory before start.'
)
parser
.
add_argument
(
'--update_method'
,
type
=
str
,
default
=
'local'
,
choices
=
[
'local'
,
'pserver'
,
'nccl2'
],
help
=
'Choose parameter update method, can be local, pserver, nccl2.'
)
parser
.
add_argument
(
'--no_split_var'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Whether split variables into blocks when update_method is pserver'
)
parser
.
add_argument
(
'--async_mode'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Whether start pserver in async mode to support ASGD'
)
parser
.
add_argument
(
'--no_random'
,
action
=
'store_true'
,
help
=
'If set, keep the random seed and do not shuffle the data.'
)
parser
.
add_argument
(
'--reduce_strategy'
,
type
=
str
,
choices
=
[
'reduce'
,
'all_reduce'
],
default
=
'all_reduce'
,
help
=
'Specify the reduce strategy, can be reduce, all_reduce'
)
parser
.
add_argument
(
'--data_dir'
,
type
=
str
,
default
=
"../data/ILSVRC2012"
,
help
=
"The ImageNet dataset root dir."
)
args
=
parser
.
parse_args
()
return
args
fluid/image_classification/dist_train/dist_train.py
浏览文件 @
8997bb63
...
@@ -26,9 +26,84 @@ import six
...
@@ -26,9 +26,84 @@ import six
import
sys
import
sys
sys
.
path
.
append
(
".."
)
sys
.
path
.
append
(
".."
)
import
models
import
models
from
args
import
*
from
reader
import
train
,
val
from
reader
import
train
,
val
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
'Distributed Image Classification Training.'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
default
=
'DistResNet'
,
help
=
'The model to run.'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size per device.'
)
parser
.
add_argument
(
'--multi_batch_repeat'
,
type
=
int
,
default
=
1
,
help
=
'Batch merge repeats.'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.1
,
help
=
'The learning rate.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
90
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
'--data_format'
,
type
=
str
,
default
=
'NCHW'
,
choices
=
[
'NCHW'
,
'NHWC'
],
help
=
'The data data_format, now only support NCHW.'
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
'The device type.'
)
parser
.
add_argument
(
'--gpus'
,
type
=
int
,
default
=
1
,
help
=
'If gpus > 1, will use ParallelExecutor to run, else use Executor.'
)
parser
.
add_argument
(
'--cpus'
,
type
=
int
,
default
=
1
,
help
=
'If cpus > 1, will set ParallelExecutor to use multiple threads.'
)
parser
.
add_argument
(
'--no_test'
,
action
=
'store_true'
,
help
=
'If set, do not test the testset during training.'
)
parser
.
add_argument
(
'--memory_optimize'
,
action
=
'store_true'
,
help
=
'If set, optimize runtime memory before start.'
)
parser
.
add_argument
(
'--update_method'
,
type
=
str
,
default
=
'local'
,
choices
=
[
'local'
,
'pserver'
,
'nccl2'
],
help
=
'Choose parameter update method, can be local, pserver, nccl2.'
)
parser
.
add_argument
(
'--no_split_var'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Whether split variables into blocks when update_method is pserver'
)
parser
.
add_argument
(
'--async_mode'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Whether start pserver in async mode to support ASGD'
)
parser
.
add_argument
(
'--reduce_strategy'
,
type
=
str
,
choices
=
[
'reduce'
,
'all_reduce'
],
default
=
'all_reduce'
,
help
=
'Specify the reduce strategy, can be reduce, all_reduce'
)
parser
.
add_argument
(
'--data_dir'
,
type
=
str
,
default
=
"../data/ILSVRC2012"
,
help
=
"The ImageNet dataset root dir."
)
args
=
parser
.
parse_args
()
return
args
def
get_model
(
args
,
is_train
,
main_prog
,
startup_prog
):
def
get_model
(
args
,
is_train
,
main_prog
,
startup_prog
):
pyreader
=
None
pyreader
=
None
class_dim
=
1000
class_dim
=
1000
...
@@ -51,7 +126,7 @@ def get_model(args, is_train, main_prog, startup_prog):
...
@@ -51,7 +126,7 @@ def get_model(args, is_train, main_prog, startup_prog):
name
=
"train_reader"
if
is_train
else
"test_reader"
,
name
=
"train_reader"
if
is_train
else
"test_reader"
,
use_double_buffer
=
True
)
use_double_buffer
=
True
)
input
,
label
=
fluid
.
layers
.
read_file
(
pyreader
)
input
,
label
=
fluid
.
layers
.
read_file
(
pyreader
)
model_def
=
models
.
__dict__
[
args
.
model
]()
model_def
=
models
.
__dict__
[
args
.
model
](
layers
=
50
,
is_train
=
is_train
)
predict
=
model_def
.
net
(
input
,
class_dim
=
class_dim
)
predict
=
model_def
.
net
(
input
,
class_dim
=
class_dim
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
...
@@ -60,89 +135,64 @@ def get_model(args, is_train, main_prog, startup_prog):
...
@@ -60,89 +135,64 @@ def get_model(args, is_train, main_prog, startup_prog):
batch_acc1
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
k
=
1
)
batch_acc1
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
k
=
1
)
batch_acc5
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
k
=
5
)
batch_acc5
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
k
=
5
)
# configure optimize
optimizer
=
None
optimizer
=
None
if
is_train
:
if
is_train
:
start_lr
=
args
.
learning_rate
# n * worker * repeat
end_lr
=
args
.
learning_rate
*
trainer_count
*
args
.
multi_batch_repeat
total_images
=
1281167
/
trainer_count
total_images
=
1281167
/
trainer_count
step
=
int
(
total_images
/
(
args
.
batch_size
*
args
.
gpus
*
args
.
multi_batch_repeat
)
+
1
)
step
=
int
(
total_images
/
(
args
.
batch_size
*
args
.
gpus
)
+
1
)
warmup_steps
=
step
*
5
# warmup 5 passes
epochs
=
[
30
,
60
,
9
0
]
epochs
=
[
30
,
60
,
8
0
]
bd
=
[
step
*
e
for
e
in
epochs
]
bd
=
[
step
*
e
for
e
in
epochs
]
base_lr
=
args
.
learning_rate
base_lr
=
end_lr
lr
=
[]
lr
=
[]
lr
=
[
base_lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
lr
=
[
base_lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
optimizer
=
fluid
.
optimizer
.
Momentum
(
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
learning_rate
=
models
.
learning_rate
.
lr_warmup
(
boundaries
=
bd
,
values
=
lr
),
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
warmup_steps
,
start_lr
,
end_lr
),
momentum
=
0.9
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
optimizer
.
minimize
(
avg_cost
)
optimizer
.
minimize
(
avg_cost
)
if
args
.
memory_optimize
:
fluid
.
memory_optimize
(
main_prog
)
batched_reader
=
None
batched_reader
=
None
pyreader
.
decorate_paddle_reader
(
pyreader
.
decorate_paddle_reader
(
paddle
.
batch
(
paddle
.
batch
(
reader
if
args
.
no_random
else
paddle
.
reader
.
shuffle
(
reader
,
reader
,
buf_size
=
5120
),
batch_size
=
args
.
batch_size
))
batch_size
=
args
.
batch_size
))
return
avg_cost
,
optimizer
,
[
batch_acc1
,
return
avg_cost
,
optimizer
,
[
batch_acc1
,
batch_acc5
],
batched_reader
,
pyreader
batch_acc5
],
batched_reader
,
pyreader
def
append_nccl2_prepare
(
trainer_id
,
startup_prog
):
def
append_nccl2_prepare
(
trainer_id
,
startup_prog
):
if
trainer_id
>=
0
:
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
# append gen_nccl_id at the end of startup program
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
worker_ips
=
os
.
getenv
(
"PADDLE_TRAINER_IPS"
)
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
)
worker_endpoints
=
[]
worker_ips
=
os
.
getenv
(
"PADDLE_TRAINER_IPS"
)
for
ip
in
worker_ips
.
split
(
","
):
worker_endpoints
=
[]
worker_endpoints
.
append
(
':'
.
join
([
ip
,
port
]))
for
ip
in
worker_ips
.
split
(
","
):
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
)
+
":"
+
port
worker_endpoints
.
append
(
':'
.
join
([
ip
,
port
]))
num_trainers
=
len
(
worker_endpoints
)
config
=
fluid
.
DistributeTranspilerConfig
()
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
)
+
":"
+
port
config
.
mode
=
"nccl2"
worker_endpoints
.
remove
(
current_endpoint
)
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
trainer_id
,
trainers
=
','
.
join
(
worker_endpoints
),
nccl_id_var
=
startup_prog
.
global_block
().
create_var
(
current_endpoint
=
current_endpoint
,
name
=
"NCCLID"
,
startup_program
=
startup_prog
)
persistable
=
True
,
type
=
fluid
.
core
.
VarDesc
.
VarType
.
RAW
)
startup_prog
.
global_block
().
append_op
(
type
=
"gen_nccl_id"
,
inputs
=
{},
outputs
=
{
"NCCLID"
:
nccl_id_var
},
attrs
=
{
"endpoint"
:
current_endpoint
,
"endpoint_list"
:
worker_endpoints
,
"trainer_id"
:
trainer_id
})
return
nccl_id_var
,
num_trainers
,
trainer_id
else
:
raise
Exception
(
"must set positive PADDLE_TRAINER_ID env variables for "
"nccl-based dist train."
)
def
dist_transpile
(
trainer_id
,
args
,
train_prog
,
startup_prog
):
def
dist_transpile
(
trainer_id
,
args
,
train_prog
,
startup_prog
):
if
trainer_id
<
0
:
return
None
,
None
# the port of all pservers, needed by both trainer and pserver
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
,
"6174"
)
# comma separated ips of all pservers, needed by trainer and
# pserver
pserver_ips
=
os
.
getenv
(
"PADDLE_PSERVER_IPS"
,
""
)
pserver_ips
=
os
.
getenv
(
"PADDLE_PSERVER_IPS"
,
""
)
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
pserver_endpoints
=
","
.
join
(
eplist
)
# total number of workers/trainers in the job, needed by
# trainer and pserver
trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS"
))
# the IP of the local machine, needed by pserver only
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
,
""
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
,
""
)
+
":"
+
port
# the role, should be either PSERVER or TRAINER
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
)
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
)
config
=
fluid
.
DistributeTranspilerConfig
()
config
=
fluid
.
DistributeTranspilerConfig
()
...
@@ -150,8 +200,6 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
...
@@ -150,8 +200,6 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
t
.
transpile
(
trainer_id
,
trainer_id
,
# NOTE: *MUST* use train_prog, for we are using with guard to
# generate different program for train and test.
program
=
train_prog
,
program
=
train_prog
,
pservers
=
pserver_endpoints
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
,
trainers
=
trainers
,
...
@@ -171,7 +219,7 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
...
@@ -171,7 +219,7 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
)
)
def
test_parallel
(
exe
,
test_args
,
args
,
test_prog
,
feeder
):
def
test_parallel
(
exe
,
test_args
,
args
,
test_prog
):
acc_evaluators
=
[]
acc_evaluators
=
[]
for
i
in
six
.
moves
.
xrange
(
len
(
test_args
[
2
])):
for
i
in
six
.
moves
.
xrange
(
len
(
test_args
[
2
])):
acc_evaluators
.
append
(
fluid
.
metrics
.
Accuracy
())
acc_evaluators
.
append
(
fluid
.
metrics
.
Accuracy
())
...
@@ -190,13 +238,10 @@ def test_parallel(exe, test_args, args, test_prog, feeder):
...
@@ -190,13 +238,10 @@ def test_parallel(exe, test_args, args, test_prog, feeder):
return
[
e
.
eval
()
for
e
in
acc_evaluators
]
return
[
e
.
eval
()
for
e
in
acc_evaluators
]
# NOTE: only need to benchmark using parallelexe
def
train_parallel
(
train_args
,
test_args
,
args
,
train_prog
,
test_prog
,
def
train_parallel
(
train_args
,
test_args
,
args
,
train_prog
,
test_prog
,
startup_prog
,
nccl_id_var
,
num_trainers
,
trainer_id
):
startup_prog
,
nccl_id_var
,
num_trainers
,
trainer_id
):
over_all_start
=
time
.
time
()
over_all_start
=
time
.
time
()
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
feeder
=
None
if
nccl_id_var
and
trainer_id
==
0
:
if
nccl_id_var
and
trainer_id
==
0
:
#FIXME(wuyi): wait other trainer to start listening
#FIXME(wuyi): wait other trainer to start listening
...
@@ -237,31 +282,27 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
...
@@ -237,31 +282,27 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
if
args
.
update_method
==
"pserver"
:
if
args
.
update_method
==
"pserver"
:
test_scope
=
None
test_scope
=
None
else
:
else
:
# NOTE: use an empty scope to avoid test exe using NCCLID
test_scope
=
fluid
.
Scope
()
test_scope
=
fluid
.
Scope
()
test_exe
=
fluid
.
ParallelExecutor
(
test_exe
=
fluid
.
ParallelExecutor
(
True
,
main_program
=
test_prog
,
share_vars_from
=
exe
)
True
,
main_program
=
test_prog
,
share_vars_from
=
exe
,
scope
=
test_scope
)
pyreader
=
train_args
[
4
]
pyreader
=
train_args
[
4
]
for
pass_id
in
range
(
args
.
pass_num
):
for
pass_id
in
range
(
args
.
pass_num
):
num_samples
=
0
num_samples
=
0
iters
=
0
start_time
=
time
.
time
()
start_time
=
time
.
time
()
batch_id
=
0
batch_id
=
0
pyreader
.
start
()
pyreader
.
start
()
while
True
:
while
True
:
if
iters
==
args
.
iterations
:
break
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
fetch_list
=
[
avg_loss
.
name
]
fetch_list
=
[
avg_loss
.
name
]
acc_name_list
=
[
v
.
name
for
v
in
train_args
[
2
]]
acc_name_list
=
[
v
.
name
for
v
in
train_args
[
2
]]
fetch_list
.
extend
(
acc_name_list
)
fetch_list
.
extend
(
acc_name_list
)
try
:
try
:
fetch_ret
=
exe
.
run
(
fetch_list
)
if
batch_id
%
30
==
0
:
fetch_ret
=
exe
.
run
(
fetch_list
)
else
:
fetch_ret
=
exe
.
run
([])
except
fluid
.
core
.
EOFException
as
eof
:
except
fluid
.
core
.
EOFException
as
eof
:
break
break
except
fluid
.
core
.
EnforceNotMet
as
ex
:
except
fluid
.
core
.
EnforceNotMet
as
ex
:
...
@@ -269,20 +310,17 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
...
@@ -269,20 +310,17 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
break
break
num_samples
+=
args
.
batch_size
*
args
.
gpus
num_samples
+=
args
.
batch_size
*
args
.
gpus
iters
+=
1
if
batch_id
%
30
==
0
:
if
batch_id
%
1
==
0
:
fetched_data
=
[
np
.
mean
(
np
.
array
(
d
))
for
d
in
fetch_ret
]
fetched_data
=
[
np
.
mean
(
np
.
array
(
d
))
for
d
in
fetch_ret
]
print
(
"Pass %d, batch %d, loss %s, accucacys: %s"
%
print
(
"Pass %d, batch %d, loss %s, accucacys: %s"
%
(
pass_id
,
batch_id
,
fetched_data
[
0
],
fetched_data
[
1
:]))
(
pass_id
,
batch_id
,
fetched_data
[
0
],
fetched_data
[
1
:]))
batch_id
+=
1
batch_id
+=
1
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
pyreader
.
reset
()
# reset reader handle
pyreader
.
reset
()
if
not
args
.
no_test
and
test_args
[
2
]:
if
not
args
.
no_test
and
test_args
[
2
]:
test_feeder
=
None
test_ret
=
test_parallel
(
test_exe
,
test_args
,
args
,
test_prog
)
test_ret
=
test_parallel
(
test_exe
,
test_args
,
args
,
test_prog
,
test_feeder
)
print
(
"Pass: %d, Test Accuracy: %s
\n
"
%
print
(
"Pass: %d, Test Accuracy: %s
\n
"
%
(
pass_id
,
[
np
.
mean
(
np
.
array
(
v
))
for
v
in
test_ret
]))
(
pass_id
,
[
np
.
mean
(
np
.
array
(
v
))
for
v
in
test_ret
]))
...
@@ -316,8 +354,6 @@ def main():
...
@@ -316,8 +354,6 @@ def main():
args
=
parse_args
()
args
=
parse_args
()
print_arguments
(
args
)
print_arguments
(
args
)
print_paddle_envs
()
print_paddle_envs
()
if
args
.
no_random
:
fluid
.
default_startup_program
().
random_seed
=
1
# the unique trainer id, starting from 0, needed by trainer
# the unique trainer id, starting from 0, needed by trainer
# only
# only
...
...
fluid/image_classification/models/__init__.py
浏览文件 @
8997bb63
...
@@ -3,6 +3,8 @@ from .mobilenet import MobileNet
...
@@ -3,6 +3,8 @@ from .mobilenet import MobileNet
from
.googlenet
import
GoogleNet
from
.googlenet
import
GoogleNet
from
.vgg
import
VGG11
,
VGG13
,
VGG16
,
VGG19
from
.vgg
import
VGG11
,
VGG13
,
VGG16
,
VGG19
from
.resnet
import
ResNet50
,
ResNet101
,
ResNet152
from
.resnet
import
ResNet50
,
ResNet101
,
ResNet152
from
.resnet_dist
import
DistResNet
from
.inception_v4
import
InceptionV4
from
.inception_v4
import
InceptionV4
from
.se_resnext
import
SE_ResNeXt50_32x4d
,
SE_ResNeXt101_32x4d
,
SE_ResNeXt152_32x4d
from
.se_resnext
import
SE_ResNeXt50_32x4d
,
SE_ResNeXt101_32x4d
,
SE_ResNeXt152_32x4d
from
.dpn
import
DPN68
,
DPN92
,
DPN98
,
DPN107
,
DPN131
from
.dpn
import
DPN68
,
DPN92
,
DPN98
,
DPN107
,
DPN131
import
learning_rate
fluid/image_classification/models/learning_rate.py
浏览文件 @
8997bb63
...
@@ -20,3 +20,31 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
...
@@ -20,3 +20,31 @@ def cosine_decay(learning_rate, step_each_epoch, epochs=120):
decayed_lr
=
learning_rate
*
\
decayed_lr
=
learning_rate
*
\
(
ops
.
cos
(
epoch
*
(
math
.
pi
/
epochs
))
+
1
)
/
2
(
ops
.
cos
(
epoch
*
(
math
.
pi
/
epochs
))
+
1
)
/
2
return
decayed_lr
return
decayed_lr
def
lr_warmup
(
learning_rate
,
warmup_steps
,
start_lr
,
end_lr
):
""" Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable
lr = lr + (warmup_rate * step / warmup_steps)
"""
assert
(
isinstance
(
end_lr
,
float
))
assert
(
isinstance
(
start_lr
,
float
))
linear_step
=
end_lr
-
start_lr
with
fluid
.
default_main_program
().
_lr_schedule_guard
():
lr
=
fluid
.
layers
.
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate_warmup"
)
global_step
=
fluid
.
layers
.
learning_rate_scheduler
.
_decay_step_counter
()
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
<
warmup_steps
):
decayed_lr
=
start_lr
+
linear_step
*
(
global_step
/
warmup_steps
)
fluid
.
layers
.
tensor
.
assign
(
decayed_lr
,
lr
)
with
switch
.
default
():
fluid
.
layers
.
tensor
.
assign
(
learning_rate
,
lr
)
return
lr
\ No newline at end of file
fluid/image_classification/models/resnet_dist.py
0 → 100644
浏览文件 @
8997bb63
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
import
math
__all__
=
[
"DistResNet"
]
train_parameters
=
{
"input_size"
:
[
3
,
224
,
224
],
"input_mean"
:
[
0.485
,
0.456
,
0.406
],
"input_std"
:
[
0.229
,
0.224
,
0.225
],
"learning_strategy"
:
{
"name"
:
"piecewise_decay"
,
"batch_size"
:
256
,
"epochs"
:
[
30
,
60
,
90
],
"steps"
:
[
0.1
,
0.01
,
0.001
,
0.0001
]
}
}
class
DistResNet
():
def
__init__
(
self
,
layers
=
50
,
is_train
=
True
):
self
.
params
=
train_parameters
self
.
layers
=
layers
self
.
is_train
=
is_train
self
.
weight_decay
=
1e-4
def
net
(
self
,
input
,
class_dim
=
1000
):
layers
=
self
.
layers
supported_layers
=
[
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
num_filters
=
[
64
,
128
,
256
,
512
]
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
7
,
pool_type
=
'avg'
,
global_pooling
=
True
)
stdv
=
1.0
/
math
.
sqrt
(
pool
.
shape
[
1
]
*
1.0
)
out
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
class_dim
,
act
=
'softmax'
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
self
.
weight_decay
)),
bias_attr
=
fluid
.
ParamAttr
(
regularizer
=
fluid
.
regularizer
.
L2Decay
(
self
.
weight_decay
))
)
return
out
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
bn_init_value
=
1.0
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
bias_attr
=
False
,
param_attr
=
fluid
.
ParamAttr
(
regularizer
=
fluid
.
regularizer
.
L2Decay
(
self
.
weight_decay
)))
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
is_test
=
not
self
.
is_train
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
bn_init_value
),
regularizer
=
None
))
def
shortcut
(
self
,
input
,
ch_out
,
stride
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
)
else
:
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
)
# NOTE: default bias is 0.0 already
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
,
bn_init_value
=
0.0
)
short
=
self
.
shortcut
(
input
,
num_filters
*
4
,
stride
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
fluid/machine_reading_comprehension/dataset.py
浏览文件 @
8997bb63
...
@@ -132,61 +132,44 @@ class BRCDataset(object):
...
@@ -132,61 +132,44 @@ class BRCDataset(object):
'passage_token_ids'
:
[],
'passage_token_ids'
:
[],
'passage_length'
:
[],
'passage_length'
:
[],
'start_id'
:
[],
'start_id'
:
[],
'end_id'
:
[]
'end_id'
:
[],
'passage_num'
:
[]
}
}
max_passage_num
=
max
(
max_passage_num
=
max
(
[
len
(
sample
[
'passages'
])
for
sample
in
batch_data
[
'raw_data'
]])
[
len
(
sample
[
'passages'
])
for
sample
in
batch_data
[
'raw_data'
]])
#max_passage_num = min(self.max_p_num, max_passage_num)
max_passage_num
=
min
(
self
.
max_p_num
,
max_passage_num
)
max_passage_num
=
self
.
max_p_num
for
sidx
,
sample
in
enumerate
(
batch_data
[
'raw_data'
]):
for
sidx
,
sample
in
enumerate
(
batch_data
[
'raw_data'
]):
count
=
0
for
pidx
in
range
(
max_passage_num
):
for
pidx
in
range
(
max_passage_num
):
if
pidx
<
len
(
sample
[
'passages'
]):
if
pidx
<
len
(
sample
[
'passages'
]):
count
+=
1
batch_data
[
'question_token_ids'
].
append
(
sample
[
batch_data
[
'question_token_ids'
].
append
(
sample
[
'question_token_ids'
])
'question_token_ids'
]
[
0
:
self
.
max_q_len
]
)
batch_data
[
'question_length'
].
append
(
batch_data
[
'question_length'
].
append
(
len
(
sample
[
'question_token_ids'
]
))
min
(
len
(
sample
[
'question_token_ids'
]),
self
.
max_q_len
))
passage_token_ids
=
sample
[
'passages'
][
pidx
][
passage_token_ids
=
sample
[
'passages'
][
pidx
][
'passage_token_ids'
]
'passage_token_ids'
]
[
0
:
self
.
max_p_len
]
batch_data
[
'passage_token_ids'
].
append
(
passage_token_ids
)
batch_data
[
'passage_token_ids'
].
append
(
passage_token_ids
)
batch_data
[
'passage_length'
].
append
(
batch_data
[
'passage_length'
].
append
(
min
(
len
(
passage_token_ids
),
self
.
max_p_len
))
min
(
len
(
passage_token_ids
),
self
.
max_p_len
))
else
:
# record the start passage index of current doc
batch_data
[
'question_token_ids'
].
append
([])
passade_idx_offset
=
sum
(
batch_data
[
'passage_num'
])
batch_data
[
'question_length'
].
append
(
0
)
batch_data
[
'passage_num'
].
append
(
count
)
batch_data
[
'passage_token_ids'
].
append
([])
gold_passage_offset
=
0
batch_data
[
'passage_length'
].
append
(
0
)
batch_data
,
padded_p_len
,
padded_q_len
=
self
.
_dynamic_padding
(
batch_data
,
pad_id
)
for
sample
in
batch_data
[
'raw_data'
]:
if
'answer_passages'
in
sample
and
len
(
sample
[
'answer_passages'
]):
if
'answer_passages'
in
sample
and
len
(
sample
[
'answer_passages'
]):
gold_passage_offset
=
padded_p_len
*
sample
[
'answer_passages'
][
for
i
in
range
(
sample
[
'answer_passages'
][
0
]):
0
]
gold_passage_offset
+=
len
(
batch_data
[
'passage_token_ids'
][
batch_data
[
'start_id'
].
append
(
gold_passage_offset
+
sample
[
passade_idx_offset
+
i
])
'answer_spans'
][
0
][
0
])
start_id
=
min
(
sample
[
'answer_spans'
][
0
][
0
],
self
.
max_p_len
)
batch_data
[
'end_id'
].
append
(
gold_passage_offset
+
sample
[
end_id
=
min
(
sample
[
'answer_spans'
][
0
][
1
],
self
.
max_p_len
)
'answer_spans'
][
0
][
1
])
batch_data
[
'start_id'
].
append
(
gold_passage_offset
+
start_id
)
batch_data
[
'end_id'
].
append
(
gold_passage_offset
+
end_id
)
else
:
else
:
# fake span for some samples, only valid for testing
# fake span for some samples, only valid for testing
batch_data
[
'start_id'
].
append
(
0
)
batch_data
[
'start_id'
].
append
(
0
)
batch_data
[
'end_id'
].
append
(
0
)
batch_data
[
'end_id'
].
append
(
0
)
return
batch_data
return
batch_data
def
_dynamic_padding
(
self
,
batch_data
,
pad_id
):
"""
Dynamically pads the batch_data with pad_id
"""
pad_p_len
=
min
(
self
.
max_p_len
,
max
(
batch_data
[
'passage_length'
]))
pad_q_len
=
min
(
self
.
max_q_len
,
max
(
batch_data
[
'question_length'
]))
batch_data
[
'passage_token_ids'
]
=
[
(
ids
+
[
pad_id
]
*
(
pad_p_len
-
len
(
ids
)))[:
pad_p_len
]
for
ids
in
batch_data
[
'passage_token_ids'
]
]
batch_data
[
'question_token_ids'
]
=
[
(
ids
+
[
pad_id
]
*
(
pad_q_len
-
len
(
ids
)))[:
pad_q_len
]
for
ids
in
batch_data
[
'question_token_ids'
]
]
return
batch_data
,
pad_p_len
,
pad_q_len
def
word_iter
(
self
,
set_name
=
None
):
def
word_iter
(
self
,
set_name
=
None
):
"""
"""
Iterates over all the words in the dataset
Iterates over all the words in the dataset
...
...
fluid/machine_reading_comprehension/rc_model.py
浏览文件 @
8997bb63
...
@@ -68,16 +68,23 @@ def bi_lstm_encoder(input_seq, gate_size, para_name, args):
...
@@ -68,16 +68,23 @@ def bi_lstm_encoder(input_seq, gate_size, para_name, args):
return
encoder_out
return
encoder_out
def
encoder
(
input_name
,
para_name
,
shape
,
hidden_size
,
args
):
def
get_data
(
input_name
,
lod_level
,
args
):
input_ids
=
layers
.
data
(
input_ids
=
layers
.
data
(
name
=
input_name
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
name
=
input_name
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
lod_level
)
return
input_ids
def
embedding
(
input_ids
,
shape
,
args
):
input_embedding
=
layers
.
embedding
(
input_embedding
=
layers
.
embedding
(
input
=
input_ids
,
input
=
input_ids
,
size
=
shape
,
size
=
shape
,
dtype
=
'float32'
,
dtype
=
'float32'
,
is_sparse
=
True
,
is_sparse
=
True
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'embedding_para'
))
param_attr
=
fluid
.
ParamAttr
(
name
=
'embedding_para'
))
return
input_embedding
def
encoder
(
input_embedding
,
para_name
,
hidden_size
,
args
):
encoder_out
=
bi_lstm_encoder
(
encoder_out
=
bi_lstm_encoder
(
input_seq
=
input_embedding
,
input_seq
=
input_embedding
,
gate_size
=
hidden_size
,
gate_size
=
hidden_size
,
...
@@ -259,40 +266,41 @@ def fusion(g, args):
...
@@ -259,40 +266,41 @@ def fusion(g, args):
def
rc_model
(
hidden_size
,
vocab
,
args
):
def
rc_model
(
hidden_size
,
vocab
,
args
):
emb_shape
=
[
vocab
.
size
(),
vocab
.
embed_dim
]
emb_shape
=
[
vocab
.
size
(),
vocab
.
embed_dim
]
start_labels
=
layers
.
data
(
name
=
"start_lables"
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
end_labels
=
layers
.
data
(
name
=
"end_lables"
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
# stage 1:encode
# stage 1:encode
p_ids_names
=
[]
q_id0
=
get_data
(
'q_id0'
,
1
,
args
)
q_ids_names
=
[]
ms
=
[]
q_ids
=
get_data
(
'q_ids'
,
2
,
args
)
gs
=
[]
p_ids_name
=
'p_ids'
qs
=
[]
for
i
in
range
(
args
.
doc_num
):
p_ids
=
get_data
(
'p_ids'
,
2
,
args
)
p_ids_name
=
"pids_%d"
%
i
p_embs
=
embedding
(
p_ids
,
emb_shape
,
args
)
p_ids_names
.
append
(
p_ids_name
)
q_embs
=
embedding
(
q_ids
,
emb_shape
,
args
)
p_enc_i
=
encoder
(
p_ids_name
,
'p_enc'
,
emb_shape
,
hidden_size
,
args
)
drnn
=
layers
.
DynamicRNN
()
with
drnn
.
block
():
q_ids_name
=
"qids_%d"
%
i
p_emb
=
drnn
.
step_input
(
p_embs
)
q_ids_names
.
append
(
q_ids_name
)
q_emb
=
drnn
.
step_input
(
q_embs
)
q_enc_i
=
encoder
(
q_ids_name
,
'q_enc'
,
emb_shape
,
hidden_size
,
args
)
p_enc
=
encoder
(
p_emb
,
'p_enc'
,
hidden_size
,
args
)
q_enc
=
encoder
(
q_emb
,
'q_enc'
,
hidden_size
,
args
)
# stage 2:match
# stage 2:match
g_i
=
attn_flow
(
q_enc
_i
,
p_enc_i
,
p_ids_name
,
args
)
g_i
=
attn_flow
(
q_enc
,
p_enc
,
p_ids_name
,
args
)
# stage 3:fusion
# stage 3:fusion
m_i
=
fusion
(
g_i
,
args
)
m_i
=
fusion
(
g_i
,
args
)
ms
.
append
(
m_i
)
drnn
.
output
(
m_i
,
q_enc
)
gs
.
append
(
g_i
)
qs
.
append
(
q_enc_i
)
ms
,
q_encs
=
drnn
()
m
=
layers
.
sequence_concat
(
input
=
ms
)
p_vec
=
layers
.
lod_reset
(
x
=
ms
,
y
=
start_labels
)
g
=
layers
.
sequence_concat
(
input
=
gs
)
q_vec
=
layers
.
lod_reset
(
x
=
q_encs
,
y
=
q_id0
)
q_vec
=
layers
.
sequence_concat
(
input
=
qs
)
# stage 4:decode
# stage 4:decode
start_probs
,
end_probs
=
point_network_decoder
(
start_probs
,
end_probs
=
point_network_decoder
(
p_vec
=
m
,
q_vec
=
q_vec
,
hidden_size
=
hidden_size
,
args
=
args
)
p_vec
=
p_vec
,
q_vec
=
q_vec
,
hidden_size
=
hidden_size
,
args
=
args
)
start_labels
=
layers
.
data
(
name
=
"start_lables"
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
end_labels
=
layers
.
data
(
name
=
"end_lables"
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
cost0
=
layers
.
sequence_pool
(
cost0
=
layers
.
sequence_pool
(
layers
.
cross_entropy
(
layers
.
cross_entropy
(
...
@@ -308,5 +316,5 @@ def rc_model(hidden_size, vocab, args):
...
@@ -308,5 +316,5 @@ def rc_model(hidden_size, vocab, args):
cost
=
cost0
+
cost1
cost
=
cost0
+
cost1
cost
.
persistable
=
True
cost
.
persistable
=
True
feeding_list
=
q_ids_names
+
[
"start_lables"
,
"end_lables"
]
+
p_ids_names
feeding_list
=
[
"q_ids"
,
"start_lables"
,
"end_lables"
,
"p_ids"
,
"q_id0"
]
return
cost
,
start_probs
,
end_probs
,
feeding_list
return
cost
,
start_probs
,
end_probs
,
ms
,
feeding_list
fluid/machine_reading_comprehension/run.py
浏览文件 @
8997bb63
...
@@ -46,22 +46,32 @@ from vocab import Vocab
...
@@ -46,22 +46,32 @@ from vocab import Vocab
def
prepare_batch_input
(
insts
,
args
):
def
prepare_batch_input
(
insts
,
args
):
doc_num
=
args
.
doc_num
batch_size
=
len
(
insts
[
'raw_data'
])
batch_size
=
len
(
insts
[
'raw_data'
])
inst_num
=
len
(
insts
[
'passage_num'
])
if
batch_size
!=
inst_num
:
print
(
"data error %d, %d"
%
(
batch_size
,
inst_num
))
return
None
new_insts
=
[]
new_insts
=
[]
passage_idx
=
0
for
i
in
range
(
batch_size
):
for
i
in
range
(
batch_size
):
p_len
=
0
p_id
=
[]
p_id
=
[]
q_id
=
[]
p_ids
=
[]
p_ids
=
[]
q_ids
=
[]
q_ids
=
[]
p_len
=
0
q_id
=
[]
for
j
in
range
(
i
*
doc_num
,
(
i
+
1
)
*
doc_num
):
p_id_r
=
[]
p_ids
.
append
(
insts
[
'passage_token_ids'
][
j
])
p_ids_r
=
[]
p_id
=
p_id
+
insts
[
'passage_token_ids'
][
j
]
q_ids_r
=
[]
q_ids
.
append
(
insts
[
'question_token_ids'
][
j
])
q_id_r
=
[]
q_id
=
q_id
+
insts
[
'question_token_ids'
][
j
]
for
j
in
range
(
insts
[
'passage_num'
][
i
]):
p_ids
.
append
(
insts
[
'passage_token_ids'
][
passage_idx
+
j
])
p_id
=
p_id
+
insts
[
'passage_token_ids'
][
passage_idx
+
j
]
q_ids
.
append
(
insts
[
'question_token_ids'
][
passage_idx
+
j
])
q_id
=
q_id
+
insts
[
'question_token_ids'
][
passage_idx
+
j
]
passage_idx
+=
insts
[
'passage_num'
][
i
]
p_len
=
len
(
p_id
)
p_len
=
len
(
p_id
)
def
_get_label
(
idx
,
ref_len
):
def
_get_label
(
idx
,
ref_len
):
...
@@ -72,11 +82,46 @@ def prepare_batch_input(insts, args):
...
@@ -72,11 +82,46 @@ def prepare_batch_input(insts, args):
start_label
=
_get_label
(
insts
[
'start_id'
][
i
],
p_len
)
start_label
=
_get_label
(
insts
[
'start_id'
][
i
],
p_len
)
end_label
=
_get_label
(
insts
[
'end_id'
][
i
],
p_len
)
end_label
=
_get_label
(
insts
[
'end_id'
][
i
],
p_len
)
new_inst
=
q_ids
+
[
start_label
,
end_label
]
+
p_ids
new_inst
=
[
q_ids
,
start_label
,
end_label
,
p_ids
,
q_id
]
new_insts
.
append
(
new_inst
)
new_insts
.
append
(
new_inst
)
return
new_insts
return
new_insts
def
batch_reader
(
batch_list
,
args
):
res
=
[]
for
batch
in
batch_list
:
res
.
append
(
prepare_batch_input
(
batch
,
args
))
return
res
def
read_multiple
(
reader
,
count
,
clip_last
=
True
):
"""
Stack data from reader for multi-devices.
"""
def
__impl__
():
res
=
[]
for
item
in
reader
():
res
.
append
(
item
)
if
len
(
res
)
==
count
:
yield
res
res
=
[]
if
len
(
res
)
==
count
:
yield
res
elif
not
clip_last
:
data
=
[]
for
item
in
res
:
data
+=
item
if
len
(
data
)
>
count
:
inst_num_per_part
=
len
(
data
)
//
count
yield
[
data
[
inst_num_per_part
*
i
:
inst_num_per_part
*
(
i
+
1
)]
for
i
in
range
(
count
)
]
return
__impl__
def
LodTensor_Array
(
lod_tensor
):
def
LodTensor_Array
(
lod_tensor
):
lod
=
lod_tensor
.
lod
()
lod
=
lod_tensor
.
lod
()
array
=
np
.
array
(
lod_tensor
)
array
=
np
.
array
(
lod_tensor
)
...
@@ -103,7 +148,7 @@ def print_para(train_prog, train_exe, logger, args):
...
@@ -103,7 +148,7 @@ def print_para(train_prog, train_exe, logger, args):
logger
.
info
(
"total param num: {0}"
.
format
(
num_sum
))
logger
.
info
(
"total param num: {0}"
.
format
(
num_sum
))
def
find_best_answer_for_passage
(
start_probs
,
end_probs
,
passage_len
,
args
):
def
find_best_answer_for_passage
(
start_probs
,
end_probs
,
passage_len
):
"""
"""
Finds the best answer with the maximum start_prob * end_prob from a single passage
Finds the best answer with the maximum start_prob * end_prob from a single passage
"""
"""
...
@@ -125,7 +170,7 @@ def find_best_answer_for_passage(start_probs, end_probs, passage_len, args):
...
@@ -125,7 +170,7 @@ def find_best_answer_for_passage(start_probs, end_probs, passage_len, args):
return
(
best_start
,
best_end
),
max_prob
return
(
best_start
,
best_end
),
max_prob
def
find_best_answer
(
sample
,
start_prob
,
end_prob
,
padded_p_len
,
args
):
def
find_best_answer
_for_inst
(
sample
,
start_prob
,
end_prob
,
inst_lod
):
"""
"""
Finds the best answer for a sample given start_prob and end_prob for each position.
Finds the best answer for a sample given start_prob and end_prob for each position.
This will call find_best_answer_for_passage because there are multiple passages in a sample
This will call find_best_answer_for_passage because there are multiple passages in a sample
...
@@ -134,11 +179,16 @@ def find_best_answer(sample, start_prob, end_prob, padded_p_len, args):
...
@@ -134,11 +179,16 @@ def find_best_answer(sample, start_prob, end_prob, padded_p_len, args):
for
p_idx
,
passage
in
enumerate
(
sample
[
'passages'
]):
for
p_idx
,
passage
in
enumerate
(
sample
[
'passages'
]):
if
p_idx
>=
args
.
max_p_num
:
if
p_idx
>=
args
.
max_p_num
:
continue
continue
if
len
(
start_prob
)
!=
len
(
end_prob
):
logger
.
info
(
'error: {}'
.
format
(
sample
[
'question'
]))
continue
passage_start
=
inst_lod
[
p_idx
]
-
inst_lod
[
0
]
passage_end
=
inst_lod
[
p_idx
+
1
]
-
inst_lod
[
0
]
passage_len
=
passage_end
-
passage_start
passage_len
=
min
(
args
.
max_p_len
,
len
(
passage
[
'passage_tokens'
]))
passage_len
=
min
(
args
.
max_p_len
,
len
(
passage
[
'passage_tokens'
]))
answer_span
,
score
=
find_best_answer_for_passage
(
answer_span
,
score
=
find_best_answer_for_passage
(
start_prob
[
p_idx
*
padded_p_len
:(
p_idx
+
1
)
*
padded_p_len
],
start_prob
[
passage_start
:
passage_end
],
end_prob
[
p_idx
*
padded_p_len
:(
p_idx
+
1
)
*
padded_p_len
],
end_prob
[
passage_start
:
passage_end
],
passage_len
)
passage_len
,
args
)
if
score
>
best_score
:
if
score
>
best_score
:
best_score
=
score
best_score
=
score
best_p_idx
=
p_idx
best_p_idx
=
p_idx
...
@@ -148,11 +198,11 @@ def find_best_answer(sample, start_prob, end_prob, padded_p_len, args):
...
@@ -148,11 +198,11 @@ def find_best_answer(sample, start_prob, end_prob, padded_p_len, args):
else
:
else
:
best_answer
=
''
.
join
(
sample
[
'passages'
][
best_p_idx
][
'passage_tokens'
][
best_answer
=
''
.
join
(
sample
[
'passages'
][
best_p_idx
][
'passage_tokens'
][
best_span
[
0
]:
best_span
[
1
]
+
1
])
best_span
[
0
]:
best_span
[
1
]
+
1
])
return
best_answer
return
best_answer
,
best_span
def
validation
(
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
feed_order
,
place
,
def
validation
(
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
match
,
feed_order
,
vocab
,
brc_data
,
logger
,
args
):
place
,
dev_count
,
vocab
,
brc_data
,
logger
,
args
):
"""
"""
"""
"""
...
@@ -165,6 +215,8 @@ def validation(inference_program, avg_cost, s_probs, e_probs, feed_order, place,
...
@@ -165,6 +215,8 @@ def validation(inference_program, avg_cost, s_probs, e_probs, feed_order, place,
# Use test set as validation each pass
# Use test set as validation each pass
total_loss
=
0.0
total_loss
=
0.0
count
=
0
count
=
0
n_batch_cnt
=
0
n_batch_loss
=
0.0
pred_answers
,
ref_answers
=
[],
[]
pred_answers
,
ref_answers
=
[],
[]
val_feed_list
=
[
val_feed_list
=
[
inference_program
.
global_block
().
var
(
var_name
)
inference_program
.
global_block
().
var
(
var_name
)
...
@@ -172,55 +224,80 @@ def validation(inference_program, avg_cost, s_probs, e_probs, feed_order, place,
...
@@ -172,55 +224,80 @@ def validation(inference_program, avg_cost, s_probs, e_probs, feed_order, place,
]
]
val_feeder
=
fluid
.
DataFeeder
(
val_feed_list
,
place
)
val_feeder
=
fluid
.
DataFeeder
(
val_feed_list
,
place
)
pad_id
=
vocab
.
get_id
(
vocab
.
pad_token
)
pad_id
=
vocab
.
get_id
(
vocab
.
pad_token
)
dev_
batches
=
brc_data
.
gen_mini_batches
(
dev_
reader
=
lambda
:
brc_data
.
gen_mini_batches
(
'dev'
,
args
.
batch_size
,
pad_id
,
shuffle
=
False
)
'dev'
,
args
.
batch_size
,
pad_id
,
shuffle
=
False
)
dev_reader
=
read_multiple
(
dev_reader
,
dev_count
)
for
batch_id
,
batch
in
enumerate
(
dev_batches
,
1
):
for
batch_id
,
batch
_list
in
enumerate
(
dev_reader
()
,
1
):
feed_data
=
prepare_batch_input
(
batch
,
args
)
feed_data
=
batch_reader
(
batch_list
,
args
)
val_fetch_outs
=
parallel_executor
.
run
(
val_fetch_outs
=
parallel_executor
.
run
(
feed
=
val_feeder
.
feed
(
feed_data
),
feed
=
list
(
val_feeder
.
feed_parallel
(
feed_data
,
dev_count
)
),
fetch_list
=
[
avg_cost
.
name
,
s_probs
.
name
,
e_probs
.
name
],
fetch_list
=
[
avg_cost
.
name
,
s_probs
.
name
,
e_probs
.
name
,
match
.
name
],
return_numpy
=
False
)
return_numpy
=
False
)
total_loss
+=
np
.
array
(
val_fetch_outs
[
0
]).
sum
()
total_loss
+=
np
.
array
(
val_fetch_outs
[
0
])[
0
]
start_probs_m
=
LodTensor_Array
(
val_fetch_outs
[
1
])
end_probs_m
=
LodTensor_Array
(
val_fetch_outs
[
2
])
start_probs
=
LodTensor_Array
(
val_fetch_outs
[
1
])
match_lod
=
val_fetch_outs
[
3
].
lod
()
end_probs
=
LodTensor_Array
(
val_fetch_outs
[
2
])
count
+=
len
(
np
.
array
(
val_fetch_outs
[
0
]))
count
+=
len
(
batch
[
'raw_data'
])
n_batch_cnt
+=
len
(
np
.
array
(
val_fetch_outs
[
0
]))
padded_p_len
=
len
(
batch
[
'passage_token_ids'
][
0
])
n_batch_loss
+=
np
.
array
(
val_fetch_outs
[
0
]).
sum
()
for
sample
,
start_prob
,
end_prob
in
zip
(
batch
[
'raw_data'
],
start_probs
,
log_every_n_batch
=
args
.
log_interval
end_probs
):
if
log_every_n_batch
>
0
and
batch_id
%
log_every_n_batch
==
0
:
logger
.
info
(
'Average dev loss from batch {} to {} is {}'
.
format
(
best_answer
=
find_best_answer
(
sample
,
start_prob
,
end_prob
,
batch_id
-
log_every_n_batch
+
1
,
batch_id
,
"%.10f"
%
(
padded_p_len
,
args
)
n_batch_loss
/
n_batch_cnt
)))
pred_answers
.
append
({
n_batch_loss
=
0.0
'question_id'
:
sample
[
'question_id'
],
n_batch_cnt
=
0
'question_type'
:
sample
[
'question_type'
],
'answers'
:
[
best_answer
],
for
idx
,
batch
in
enumerate
(
batch_list
):
'entity_answers'
:
[[]],
#one batch
'yesno_answers'
:
[]
batch_size
=
len
(
batch
[
'raw_data'
])
})
batch_range
=
match_lod
[
0
][
idx
*
batch_size
:(
idx
+
1
)
*
batch_size
+
if
'answers'
in
sample
:
1
]
ref_answers
.
append
({
batch_lod
=
[[
batch_range
[
x
],
batch_range
[
x
+
1
]]
for
x
in
range
(
len
(
batch_range
[:
-
1
]))]
start_prob_batch
=
start_probs_m
[
idx
*
batch_size
:(
idx
+
1
)
*
batch_size
]
end_prob_batch
=
end_probs_m
[
idx
*
batch_size
:(
idx
+
1
)
*
batch_size
]
for
sample
,
start_prob_inst
,
end_prob_inst
,
inst_range
in
zip
(
batch
[
'raw_data'
],
start_prob_batch
,
end_prob_batch
,
batch_lod
):
#one instance
inst_lod
=
match_lod
[
1
][
inst_range
[
0
]:
inst_range
[
1
]
+
1
]
best_answer
,
best_span
=
find_best_answer_for_inst
(
sample
,
start_prob_inst
,
end_prob_inst
,
inst_lod
)
pred
=
{
'question_id'
:
sample
[
'question_id'
],
'question_id'
:
sample
[
'question_id'
],
'question_type'
:
sample
[
'question_type'
],
'question_type'
:
sample
[
'question_type'
],
'answers'
:
sample
[
'answers'
],
'answers'
:
[
best_answer
],
'entity_answers'
:
[[]],
'entity_answers'
:
[[]],
'yesno_answers'
:
[]
'yesno_answers'
:
[
best_span
]
})
}
if
args
.
result_dir
is
not
None
and
args
.
result_name
is
not
None
:
pred_answers
.
append
(
pred
)
if
'answers'
in
sample
:
ref
=
{
'question_id'
:
sample
[
'question_id'
],
'question_type'
:
sample
[
'question_type'
],
'answers'
:
sample
[
'answers'
],
'entity_answers'
:
[[]],
'yesno_answers'
:
[]
}
ref_answers
.
append
(
ref
)
result_dir
=
args
.
result_dir
result_prefix
=
args
.
result_name
if
result_dir
is
not
None
and
result_prefix
is
not
None
:
if
not
os
.
path
.
exists
(
args
.
result_dir
):
if
not
os
.
path
.
exists
(
args
.
result_dir
):
os
.
makedirs
(
args
.
result_dir
)
os
.
makedirs
(
args
.
result_dir
)
result_file
=
os
.
path
.
join
(
args
.
result_dir
,
args
.
result_name
+
'.
json'
)
result_file
=
os
.
path
.
join
(
result_dir
,
result_prefix
+
'
json'
)
with
open
(
result_file
,
'w'
)
as
fout
:
with
open
(
result_file
,
'w'
)
as
fout
:
for
pred_answer
in
pred_answers
:
for
pred_answer
in
pred_answers
:
fout
.
write
(
json
.
dumps
(
pred_answer
,
ensure_ascii
=
False
)
+
'
\n
'
)
fout
.
write
(
json
.
dumps
(
pred_answer
,
ensure_ascii
=
False
)
+
'
\n
'
)
logger
.
info
(
'Saving {} results to {}'
.
format
(
args
.
result_name
,
logger
.
info
(
'Saving {} results to {}'
.
format
(
result_prefix
,
result_file
))
result_file
))
ave_loss
=
1.0
*
total_loss
/
count
ave_loss
=
1.0
*
total_loss
/
count
# compute the bleu and rouge scores if reference answers is provided
# compute the bleu and rouge scores if reference answers is provided
if
len
(
ref_answers
)
>
0
:
if
len
(
ref_answers
)
>
0
:
pred_dict
,
ref_dict
=
{},
{}
pred_dict
,
ref_dict
=
{},
{}
...
@@ -250,6 +327,13 @@ def train(logger, args):
...
@@ -250,6 +327,13 @@ def train(logger, args):
brc_data
.
convert_to_ids
(
vocab
)
brc_data
.
convert_to_ids
(
vocab
)
logger
.
info
(
'Initialize the model...'
)
logger
.
info
(
'Initialize the model...'
)
if
not
args
.
use_gpu
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
else
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
# build model
# build model
main_program
=
fluid
.
Program
()
main_program
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
...
@@ -257,7 +341,7 @@ def train(logger, args):
...
@@ -257,7 +341,7 @@ def train(logger, args):
startup_prog
.
random_seed
=
args
.
random_seed
startup_prog
.
random_seed
=
args
.
random_seed
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
avg_cost
,
s_probs
,
e_probs
,
feed_order
=
rc_model
.
rc_model
(
avg_cost
,
s_probs
,
e_probs
,
match
,
feed_order
=
rc_model
.
rc_model
(
args
.
hidden_size
,
vocab
,
args
)
args
.
hidden_size
,
vocab
,
args
)
# clone from default main program and use it as the validation program
# clone from default main program and use it as the validation program
inference_program
=
main_program
.
clone
(
for_test
=
True
)
inference_program
=
main_program
.
clone
(
for_test
=
True
)
...
@@ -314,20 +398,21 @@ def train(logger, args):
...
@@ -314,20 +398,21 @@ def train(logger, args):
for
pass_id
in
range
(
1
,
args
.
pass_num
+
1
):
for
pass_id
in
range
(
1
,
args
.
pass_num
+
1
):
pass_start_time
=
time
.
time
()
pass_start_time
=
time
.
time
()
pad_id
=
vocab
.
get_id
(
vocab
.
pad_token
)
pad_id
=
vocab
.
get_id
(
vocab
.
pad_token
)
train_
batches
=
brc_data
.
gen_mini_batches
(
train_
reader
=
lambda
:
brc_data
.
gen_mini_batches
(
'train'
,
args
.
batch_size
,
pad_id
,
shuffle
=
False
)
'train'
,
args
.
batch_size
,
pad_id
,
shuffle
=
True
)
train_reader
=
read_multiple
(
train_reader
,
dev_count
)
log_every_n_batch
,
n_batch_loss
=
args
.
log_interval
,
0
log_every_n_batch
,
n_batch_loss
=
args
.
log_interval
,
0
total_num
,
total_loss
=
0
,
0
total_num
,
total_loss
=
0
,
0
for
batch_id
,
batch
in
enumerate
(
train_batches
,
1
):
for
batch_id
,
batch
_list
in
enumerate
(
train_reader
()
,
1
):
input_data_dict
=
prepare_batch_input
(
batch
,
args
)
feed_data
=
batch_reader
(
batch_list
,
args
)
fetch_outs
=
parallel_executor
.
run
(
fetch_outs
=
parallel_executor
.
run
(
feed
=
feeder
.
feed
(
input_data_dict
),
feed
=
list
(
feeder
.
feed_parallel
(
feed_data
,
dev_count
)
),
fetch_list
=
[
avg_cost
.
name
],
fetch_list
=
[
avg_cost
.
name
],
return_numpy
=
False
)
return_numpy
=
False
)
cost_train
=
np
.
array
(
fetch_outs
[
0
])
[
0
]
cost_train
=
np
.
array
(
fetch_outs
[
0
])
.
mean
()
total_num
+=
len
(
batch
[
'raw_data'
])
total_num
+=
args
.
batch_size
*
dev_count
n_batch_loss
+=
cost_train
n_batch_loss
+=
cost_train
total_loss
+=
cost_train
*
len
(
batch
[
'raw_data'
])
total_loss
+=
cost_train
*
args
.
batch_size
*
dev_count
if
log_every_n_batch
>
0
and
batch_id
%
log_every_n_batch
==
0
:
if
log_every_n_batch
>
0
and
batch_id
%
log_every_n_batch
==
0
:
print_para
(
main_program
,
parallel_executor
,
logger
,
print_para
(
main_program
,
parallel_executor
,
logger
,
args
)
args
)
...
@@ -337,19 +422,23 @@ def train(logger, args):
...
@@ -337,19 +422,23 @@ def train(logger, args):
"%.10f"
%
(
n_batch_loss
/
log_every_n_batch
)))
"%.10f"
%
(
n_batch_loss
/
log_every_n_batch
)))
n_batch_loss
=
0
n_batch_loss
=
0
if
args
.
dev_interval
>
0
and
batch_id
%
args
.
dev_interval
==
0
:
if
args
.
dev_interval
>
0
and
batch_id
%
args
.
dev_interval
==
0
:
eval_loss
,
bleu_rouge
=
validation
(
if
brc_data
.
dev_set
is
not
None
:
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
eval_loss
,
bleu_rouge
=
validation
(
feed_order
,
place
,
vocab
,
brc_data
,
logger
,
args
)
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
match
,
feed_order
,
place
,
dev_count
,
vocab
,
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
brc_data
,
logger
,
args
)
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
pass_end_time
=
time
.
time
()
pass_end_time
=
time
.
time
()
logger
.
info
(
'Evaluating the model after epoch {}'
.
format
(
logger
.
info
(
'Evaluating the model after epoch {}'
.
format
(
pass_id
))
pass_id
))
if
brc_data
.
dev_set
is
not
None
:
if
brc_data
.
dev_set
is
not
None
:
eval_loss
,
bleu_rouge
=
validation
(
eval_loss
,
bleu_rouge
=
validation
(
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
match
,
feed_order
,
place
,
vocab
,
brc_data
,
logger
,
args
)
feed_order
,
place
,
dev_count
,
vocab
,
brc_data
,
logger
,
args
)
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
else
:
else
:
...
@@ -389,10 +478,17 @@ def evaluate(logger, args):
...
@@ -389,10 +478,17 @@ def evaluate(logger, args):
startup_prog
.
random_seed
=
args
.
random_seed
startup_prog
.
random_seed
=
args
.
random_seed
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
avg_cost
,
s_probs
,
e_probs
,
feed_order
=
rc_model
.
rc_model
(
avg_cost
,
s_probs
,
e_probs
,
match
,
feed_order
=
rc_model
.
rc_model
(
args
.
hidden_size
,
vocab
,
args
)
args
.
hidden_size
,
vocab
,
args
)
# initialize parameters
# initialize parameters
place
=
core
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
core
.
CPUPlace
()
if
not
args
.
use_gpu
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
else
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
if
args
.
load_dir
:
if
args
.
load_dir
:
logger
.
info
(
'load from {}'
.
format
(
args
.
load_dir
))
logger
.
info
(
'load from {}'
.
format
(
args
.
load_dir
))
...
@@ -402,17 +498,10 @@ def evaluate(logger, args):
...
@@ -402,17 +498,10 @@ def evaluate(logger, args):
logger
.
error
(
'No model file to load ...'
)
logger
.
error
(
'No model file to load ...'
)
return
return
# prepare data
feed_list
=
[
main_program
.
global_block
().
var
(
var_name
)
for
var_name
in
feed_order
]
feeder
=
fluid
.
DataFeeder
(
feed_list
,
place
)
inference_program
=
main_program
.
clone
(
for_test
=
True
)
inference_program
=
main_program
.
clone
(
for_test
=
True
)
eval_loss
,
bleu_rouge
=
validation
(
eval_loss
,
bleu_rouge
=
validation
(
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
feed_order
,
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
feed_order
,
place
,
vocab
,
brc_data
,
logger
,
args
)
place
,
dev_count
,
vocab
,
brc_data
,
logger
,
args
)
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
logger
.
info
(
'Dev eval loss {}'
.
format
(
eval_loss
))
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
logger
.
info
(
'Dev eval result: {}'
.
format
(
bleu_rouge
))
logger
.
info
(
'Predicted answers are saved to {}'
.
format
(
logger
.
info
(
'Predicted answers are saved to {}'
.
format
(
...
@@ -438,10 +527,17 @@ def predict(logger, args):
...
@@ -438,10 +527,17 @@ def predict(logger, args):
startup_prog
.
random_seed
=
args
.
random_seed
startup_prog
.
random_seed
=
args
.
random_seed
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
program_guard
(
main_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
avg_cost
,
s_probs
,
e_probs
,
feed_order
=
rc_model
.
rc_model
(
avg_cost
,
s_probs
,
e_probs
,
match
,
feed_order
=
rc_model
.
rc_model
(
args
.
hidden_size
,
vocab
,
args
)
args
.
hidden_size
,
vocab
,
args
)
# initialize parameters
# initialize parameters
place
=
core
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
core
.
CPUPlace
()
if
not
args
.
use_gpu
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
else
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
if
args
.
load_dir
:
if
args
.
load_dir
:
logger
.
info
(
'load from {}'
.
format
(
args
.
load_dir
))
logger
.
info
(
'load from {}'
.
format
(
args
.
load_dir
))
...
@@ -451,17 +547,10 @@ def predict(logger, args):
...
@@ -451,17 +547,10 @@ def predict(logger, args):
logger
.
error
(
'No model file to load ...'
)
logger
.
error
(
'No model file to load ...'
)
return
return
# prepare data
feed_list
=
[
main_program
.
global_block
().
var
(
var_name
)
for
var_name
in
feed_order
]
feeder
=
fluid
.
DataFeeder
(
feed_list
,
place
)
inference_program
=
main_program
.
clone
(
for_test
=
True
)
inference_program
=
main_program
.
clone
(
for_test
=
True
)
eval_loss
,
bleu_rouge
=
validation
(
eval_loss
,
bleu_rouge
=
validation
(
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
feed_order
,
inference_program
,
avg_cost
,
s_probs
,
e_probs
,
match
,
place
,
vocab
,
brc_data
,
logger
,
args
)
feed_order
,
place
,
dev_count
,
vocab
,
brc_data
,
logger
,
args
)
def
prepare
(
logger
,
args
):
def
prepare
(
logger
,
args
):
...
...
fluid/machine_reading_comprehension/run.sh
浏览文件 @
8997bb63
export
CUDA_VISIBLE_DEVICES
=
1
export
CUDA_VISIBLE_DEVICES
=
0
python run.py
\
python run.py
\
--trainset
'data/preprocessed/trainset/search.train.json'
\
--trainset
'data/preprocessed/trainset/search.train.json'
\
'data/preprocessed/trainset/zhidao.train.json'
\
'data/preprocessed/trainset/zhidao.train.json'
\
...
@@ -11,11 +11,12 @@ python run.py \
...
@@ -11,11 +11,12 @@ python run.py \
--save_dir
./models
\
--save_dir
./models
\
--pass_num
10
\
--pass_num
10
\
--learning_rate
0.001
\
--learning_rate
0.001
\
--batch_size
8
\
--batch_size
32
\
--embed_size
300
\
--embed_size
300
\
--hidden_size
150
\
--hidden_size
150
\
--max_p_num
5
\
--max_p_num
5
\
--max_p_len
500
\
--max_p_len
500
\
--max_q_len
60
\
--max_q_len
60
\
--max_a_len
200
\
--max_a_len
200
\
--weight_decay
0.0
\
--drop_rate
0.2
$@
\
--drop_rate
0.2
$@
\
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录