Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d3c755df
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d3c755df
编写于
3月 02, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine code
上级
0a33f170
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
39 addition
and
56 deletion
+39
-56
demo/sentiment/train_v2.py
demo/sentiment/train_v2.py
+39
-56
未找到文件。
demo/sentiment/train_v2.py
浏览文件 @
d3c755df
import
sys
from
os.path
import
join
as
join_path
import
paddle.trainer_config_helpers.attrs
as
attrs
from
paddle.trainer_config_helpers.poolings
import
MaxPooling
...
...
@@ -188,88 +189,69 @@ def stacked_lstm_net(input_dim,
return
cost
def
data_reader
():
data_dir
=
"./data/pre-imdb"
train_file
=
"train_part_000"
test_file
=
"test_part_000"
dict_file
=
"dict.txt"
train_file
=
join_path
(
data_dir
,
train_file
)
test_file
=
join_path
(
data_dir
,
test_file
)
dict_file
=
join_path
(
data_dir
,
dict_file
)
with
open
(
dict_file
,
'r'
)
as
fdict
,
open
(
train_file
,
'r'
)
as
fdata
:
dictionary
=
dict
()
for
i
,
line
in
enumerate
(
fdict
):
dictionary
[
line
.
split
(
'
\t
'
)[
0
]]
=
i
for
line_count
,
line
in
enumerate
(
fdata
):
label
,
comment
=
line
.
strip
().
split
(
'
\t\t
'
)
label
=
int
(
label
)
words
=
comment
.
split
()
word_slot
=
[
dictionary
[
w
]
for
w
in
words
if
w
in
dictionary
]
yield
(
word_slot
,
label
)
def
test_reader
():
data_dir
=
"./data/pre-imdb"
train_file
=
"train_part_000"
test_file
=
"test_part_000"
dict_file
=
"dict.txt"
train_file
=
join_path
(
data_dir
,
train_file
)
test_file
=
join_path
(
data_dir
,
test_file
)
dict_file
=
join_path
(
data_dir
,
dict_file
)
with
open
(
dict_file
,
'r'
)
as
fdict
,
open
(
test_file
,
'r'
)
as
ftest
:
dictionary
=
dict
()
for
i
,
line
in
enumerate
(
fdict
):
dictionary
[
line
.
split
(
'
\t
'
)[
0
]]
=
i
for
line_count
,
line
in
enumerate
(
ftest
):
label
,
comment
=
line
.
strip
().
split
(
'
\t\t
'
)
label
=
int
(
label
)
words
=
comment
.
split
()
word_slot
=
[
dictionary
[
w
]
for
w
in
words
if
w
in
dictionary
]
yield
(
word_slot
,
label
)
def
data_reader
(
data_file
,
dict_file
):
def
reader
():
with
open
(
dict_file
,
'r'
)
as
fdict
,
open
(
data_file
,
'r'
)
as
fdata
:
dictionary
=
dict
()
for
i
,
line
in
enumerate
(
fdict
):
dictionary
[
line
.
split
(
'
\t
'
)[
0
]]
=
i
for
line_count
,
line
in
enumerate
(
fdata
):
label
,
comment
=
line
.
strip
().
split
(
'
\t\t
'
)
label
=
int
(
label
)
words
=
comment
.
split
()
word_slot
=
[
dictionary
[
w
]
for
w
in
words
if
w
in
dictionary
]
yield
(
word_slot
,
label
)
return
reader
if
__name__
==
'__main__'
:
data_dir
=
"./data/pre-imdb"
train_list
=
"train.list"
test_list
=
"test.list"
dict_file
=
"dict.txt"
dict_dim
=
len
(
open
(
join_path
(
data_dir
,
"dict.txt"
)).
readlines
())
class_dim
=
len
(
open
(
join_path
(
data_dir
,
'labels.list'
)).
readlines
())
is_predict
=
False
# data file
train_file
=
"./data/pre-imdb/train_part_000"
test_file
=
"./data/pre-imdb/test_part_000"
dict_file
=
"./data/pre-imdb/dict.txt"
labels
=
"./data/pre-imdb/labels.list"
# init
paddle
.
init
(
use_gpu
=
True
,
trainer_count
=
4
)
# network config
# cost = convolution_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
cost
=
stacked_lstm_net
(
dict_dim
,
class_dim
=
class_dim
,
stacked_num
=
3
,
is_predict
=
is_predict
)
dict_dim
=
len
(
open
(
dict_file
).
readlines
())
class_dim
=
len
(
open
(
labels
).
readlines
())
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost
=
convolution_net
(
dict_dim
,
class_dim
=
class_dim
)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters
=
paddle
.
parameters
.
create
(
cost
)
# create optimizer
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
2e-3
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
8e-4
),
model_average
=
paddle
.
optimizer
.
ModelAverage
(
average_window
=
0.5
))
# End batch and end pass event handler
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
print
"
\n
Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
else
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
test_reader
,
batch_size
=
128
),
data_reader
(
test_file
,
dict_file
)
,
batch_size
=
128
),
reader_dict
=
{
'word'
:
0
,
'label'
:
1
})
print
"Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# create trainer
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
adam_optimizer
)
...
...
@@ -277,7 +259,8 @@ if __name__ == '__main__':
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
data_reader
,
buf_size
=
4096
),
batch_size
=
128
),
data_reader
(
train_file
,
dict_file
),
buf_size
=
4096
),
batch_size
=
128
),
event_handler
=
event_handler
,
reader_dict
=
{
'word'
:
0
,
'label'
:
1
},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录