Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8a521c0b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8a521c0b
编写于
7月 16, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove buggy get_test_program and refine c++ reader demo
上级
ebe3b5e7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
62 addition
and
159 deletion
+62
-159
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+0
-98
python/paddle/fluid/tests/demo/text_classification/convert_data_to_recordio.py
...ests/demo/text_classification/convert_data_to_recordio.py
+6
-2
python/paddle/fluid/tests/demo/text_classification/train.py
python/paddle/fluid/tests/demo/text_classification/train.py
+56
-59
未找到文件。
python/paddle/fluid/io.py
浏览文件 @
8a521c0b
...
@@ -789,101 +789,3 @@ def get_parameter_value_by_name(name, executor, program=None):
...
@@ -789,101 +789,3 @@ def get_parameter_value_by_name(name, executor, program=None):
program
=
default_main_program
()
program
=
default_main_program
()
var
=
program
.
global_block
().
var
(
name
)
var
=
program
.
global_block
().
var
(
name
)
return
get_parameter_value
(
var
,
executor
)
return
get_parameter_value
(
var
,
executor
)
def
get_test_program
(
filelist
,
program
=
None
,
startup_program
=
None
):
"""
Transpile current train program to a program to read test dataset
if the program is using reader ops like "open_files_op".
"""
def
_copy_reader_var_
(
block
,
var
,
new_name
=
None
):
if
new_name
==
None
:
new_name
=
var
.
name
new_var
=
block
.
create_var
(
name
=
str
(
new_name
),
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
persistable
=
True
return
new_var
def
_get_test_reader_name
(
train_reader_name
):
return
train_reader_name
+
"_test"
def
_is_reader_op
(
op
):
block
=
op
.
block
if
"Out"
in
op
.
output_names
:
reader_out
=
block
.
vars
[
op
.
output
(
"Out"
)[
0
]]
if
reader_out
.
type
==
core
.
VarDesc
.
VarType
.
READER
:
return
True
return
False
if
program
==
None
:
program
=
default_main_program
()
if
startup_program
==
None
:
startup_program
=
default_startup_program
()
startup_block
=
startup_program
.
global_block
()
# 1. find out the orignal reader var name
startup_reader_op_list
=
[]
for
op
in
startup_block
.
ops
:
if
_is_reader_op
(
op
):
startup_reader_op_list
.
append
(
op
)
if
len
(
startup_reader_op_list
)
==
0
:
return
program
root_reader_op
=
startup_reader_op_list
[
0
]
train_test_reader_map
=
{}
# 2. add operators to startup to read open and read test data files
for
op
in
startup_reader_op_list
:
assert
(
len
(
op
.
output
(
"Out"
))
==
1
)
train_reader_name
=
op
.
output
(
"Out"
)[
0
]
train_reader
=
startup_block
.
vars
[
train_reader_name
]
test_reader
=
_copy_reader_var_
(
startup_block
,
train_reader
,
new_name
=
_get_test_reader_name
(
train_reader_name
))
train_test_reader_map
[
train_reader
.
name
]
=
test_reader
test_op_inputs
=
{}
for
name
in
op
.
input_names
:
train_arg_names
=
op
.
input
(
name
)
test_arg_vars
=
[]
for
arg_name
in
train_arg_names
:
arg_var
=
train_test_reader_map
[
arg_name
]
if
name
==
"UnderlyingReader"
else
startup_block
.
vars
[
arg_name
]
test_arg_vars
.
append
(
arg_var
)
test_op_inputs
[
name
]
=
test_arg_vars
test_op
=
startup_block
.
append_op
(
type
=
op
.
type
,
inputs
=
test_op_inputs
,
outputs
=
{
'Out'
:
[
test_reader
]},
attrs
=
op
.
attrs
)
# root reader op's filelist attr for read test files
if
op
.
type
==
root_reader_op
.
type
:
test_op
.
set_attr
(
"file_names"
,
filelist
)
if
op
.
type
==
"create_multi_pass_reader"
:
test_op
.
set_attr
(
"pass_num"
,
1
)
# 3. rename reader vars in inference program to different name
# to avoid read from train data.
main_block
=
program
.
global_block
()
for
var
in
main_block
.
vars
.
values
():
if
var
.
type
==
core
.
VarDesc
.
VarType
.
READER
:
main_block
.
rename_var
(
str
(
var
.
name
),
str
(
_get_test_reader_name
(
var
.
name
)))
for
op
in
main_block
.
ops
:
if
op
.
type
==
root_reader_op
.
type
:
test_op
.
set_attr
(
"file_names"
,
filelist
)
if
op
.
type
==
"create_multi_pass_reader"
:
test_op
.
set_attr
(
"pass_num"
,
1
)
startup_program
.
sync_with_cpp
()
program
.
sync_with_cpp
()
return
program
python/paddle/fluid/tests/demo/text_classification/convert_data_to_recordio.py
浏览文件 @
8a521c0b
...
@@ -31,8 +31,12 @@ def load_vocab(filename):
...
@@ -31,8 +31,12 @@ def load_vocab(filename):
# load word dict with paddle inner function
# load word dict with paddle inner function
word_dict
=
load_vocab
(
sys
.
argv
[
1
])
if
len
(
sys
.
argv
)
>
1
:
word_dict
[
"<unk>"
]
=
len
(
word_dict
)
word_dict
=
load_vocab
(
sys
.
argv
[
1
])
word_dict
[
"<unk>"
]
=
len
(
word_dict
)
else
:
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
print
"Dict dim = "
,
len
(
word_dict
)
print
"Dict dim = "
,
len
(
word_dict
)
# input text data
# input text data
...
...
python/paddle/fluid/tests/demo/text_classification/train.py
浏览文件 @
8a521c0b
...
@@ -19,7 +19,7 @@ import sys
...
@@ -19,7 +19,7 @@ import sys
TRAIN_FILES
=
[
'train.recordio'
]
TRAIN_FILES
=
[
'train.recordio'
]
TEST_FILES
=
[
'test.recordio'
]
TEST_FILES
=
[
'test.recordio'
]
DICT_DIM
=
89528
DICT_DIM
=
5147
# embedding dim
# embedding dim
emb_dim
=
128
emb_dim
=
128
...
@@ -33,33 +33,24 @@ hid_dim2 = 96
...
@@ -33,33 +33,24 @@ hid_dim2 = 96
# class num
# class num
class_dim
=
2
class_dim
=
2
# epoch num
epoch_num
=
10
def
network_cfg
(
is_train
,
pass_num
=
100
):
with
fluid
.
unique_name
.
guard
():
train_file_obj
=
fluid
.
layers
.
open_files
(
filenames
=
TRAIN_FILES
,
pass_num
=
pass_num
,
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
],
thread_num
=
1
)
test_file_obj
=
fluid
.
layers
.
open_files
(
filenames
=
TEST_FILES
,
pass_num
=
1
,
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
],
thread_num
=
1
)
if
is_train
:
def
build_program
(
is_train
):
file_obj
=
fluid
.
layers
.
shuffle
(
train_file_obj
,
buffer_size
=
1000
)
file_obj_handle
=
fluid
.
layers
.
io
.
open_files
(
else
:
filenames
=
TRAIN_FILES
if
is_train
else
TEST_FILES
,
file_obj
=
test_file_obj
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
],
thread_num
=
1
)
if
is_train
:
file_obj
=
fluid
.
layers
.
io
.
shuffle
(
file_obj_handle
,
buffer_size
=
1000
)
else
:
file_obj
=
file_obj_handle
file_obj
=
fluid
.
layers
.
io
.
double_buffer
(
file_obj
)
file_obj
=
fluid
.
layers
.
double_buffer
(
with
fluid
.
unique_name
.
guard
():
file_obj
,
name
=
"train_double_buffer"
if
is_train
else
'test_double_buffer'
)
data
,
label
=
fluid
.
layers
.
read_file
(
file_obj
)
data
,
label
=
fluid
.
layers
.
read_file
(
file_obj
)
...
@@ -90,58 +81,64 @@ def network_cfg(is_train, pass_num=100):
...
@@ -90,58 +81,64 @@ def network_cfg(is_train, pass_num=100):
if
is_train
:
if
is_train
:
# SGD optimizer
# SGD optimizer
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.01
)
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.0
0
1
)
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
{
return
{
'loss'
:
avg_cost
,
'log'
:
[
avg_cost
,
acc
],
'file'
:
file_obj_handle
}
'loss'
:
avg_cost
,
'log'
:
[
avg_cost
,
acc
],
'file'
:
train_file_obj
if
is_train
else
test_file_obj
}
def
main
():
def
main
():
train
=
fluid
.
Program
()
train
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train
,
startup
):
with
fluid
.
program_guard
(
train
,
startup
):
train_args
=
network_cfg
(
is_train
=
True
)
train_args
=
build_program
(
is_train
=
True
)
test
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test
,
fluid
.
Program
()
):
with
fluid
.
program_guard
(
test
,
startup
):
test_args
=
network_cfg
(
is_train
=
False
)
test_args
=
build_program
(
is_train
=
False
)
use_cuda
=
fluid
.
core
.
is_compiled_with_cuda
()
# startup
# startup
place
=
fluid
.
CUDAPlace
(
0
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
exe
.
run
(
startup
)
exe
.
run
(
startup
)
train_exe
=
fluid
.
ParallelExecutor
(
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
loss_name
=
train_args
[
'loss'
].
name
,
main_program
=
train
)
use_cuda
=
use_cuda
,
loss_name
=
train_args
[
'loss'
].
name
,
main_program
=
train
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
test
,
share_vars_from
=
train_exe
)
fetch_var_list
=
[
var
.
name
for
var
in
train_args
[
'log'
]]
fetch_var_list
=
[
var
.
name
for
var
in
train_args
[
'log'
]]
for
i
in
xrange
(
sys
.
maxint
):
for
epoch_id
in
range
(
epoch_num
):
result
=
map
(
numpy
.
array
,
# train
train_exe
.
run
(
fetch_list
=
fetch_var_list
try
:
if
i
%
1000
==
0
else
[]))
batch_id
=
0
if
len
(
result
)
!=
0
:
while
True
:
print
'Train: '
,
result
result
=
map
(
numpy
.
array
,
train_exe
.
run
(
fetch_list
=
fetch_var_list
if
i
%
1000
==
0
:
if
batch_id
%
10
==
0
else
[]))
test_exe
=
fluid
.
ParallelExecutor
(
if
len
(
result
)
!=
0
:
use_cuda
=
True
,
main_program
=
test
,
share_vars_from
=
train_exe
)
print
'Train loss: '
,
result
loss
=
[]
batch_id
+=
1
acc
=
[]
except
fluid
.
core
.
EOFException
:
try
:
print
'End of epoch'
,
epoch_id
while
True
:
train_args
[
'file'
].
reset
()
loss_np
,
acc_np
=
map
(
numpy
.
array
,
test_exe
.
run
(
fetch_list
=
fetch_var_list
))
# test
loss
.
append
(
loss_np
[
0
])
loss
=
[]
acc
.
append
(
acc_np
[
0
])
acc
=
[]
except
:
try
:
test_args
[
'file'
].
reset
()
while
True
:
print
'TEST: '
,
numpy
.
mean
(
loss
),
numpy
.
mean
(
acc
)
loss_np
,
acc_np
=
map
(
numpy
.
array
,
test_exe
.
run
(
fetch_list
=
fetch_var_list
))
loss
.
append
(
loss_np
[
0
])
acc
.
append
(
acc_np
[
0
])
except
:
test_args
[
'file'
].
reset
()
print
'TEST: '
,
numpy
.
mean
(
loss
),
numpy
.
mean
(
acc
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录