Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
4a265b52
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4a265b52
编写于
3月 02, 2017
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use reader in dataset imdb.py
上级
d3c755df
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
14 addition
and
29 deletion
+14
-29
demo/sentiment/train_v2.py
demo/sentiment/train_v2.py
+9
-29
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+5
-0
未找到文件。
demo/sentiment/train_v2.py
浏览文件 @
4a265b52
...
...
@@ -2,10 +2,11 @@ import sys
from
os.path
import
join
as
join_path
import
paddle.trainer_config_helpers.attrs
as
attrs
from
paddle.trainer_config_helpers.poolings
import
MaxPooling
import
paddle.v2
as
paddle
import
paddle.v2.layer
as
layer
import
paddle.v2.activation
as
activation
import
paddle.v2.data_type
as
data_type
import
paddle.v2.dataset.imdb
as
imdb
import
paddle.v2
as
paddle
def
sequence_conv_pool
(
input
,
...
...
@@ -189,36 +190,15 @@ def stacked_lstm_net(input_dim,
return
cost
def
data_reader
(
data_file
,
dict_file
):
def
reader
():
with
open
(
dict_file
,
'r'
)
as
fdict
,
open
(
data_file
,
'r'
)
as
fdata
:
dictionary
=
dict
()
for
i
,
line
in
enumerate
(
fdict
):
dictionary
[
line
.
split
(
'
\t
'
)[
0
]]
=
i
for
line_count
,
line
in
enumerate
(
fdata
):
label
,
comment
=
line
.
strip
().
split
(
'
\t\t
'
)
label
=
int
(
label
)
words
=
comment
.
split
()
word_slot
=
[
dictionary
[
w
]
for
w
in
words
if
w
in
dictionary
]
yield
(
word_slot
,
label
)
return
reader
if
__name__
==
'__main__'
:
# data file
train_file
=
"./data/pre-imdb/train_part_000"
test_file
=
"./data/pre-imdb/test_part_000"
dict_file
=
"./data/pre-imdb/dict.txt"
labels
=
"./data/pre-imdb/labels.list"
# init
paddle
.
init
(
use_gpu
=
True
,
trainer_count
=
4
)
# network config
dict_dim
=
len
(
open
(
dict_file
).
readlines
())
class_dim
=
len
(
open
(
labels
).
readlines
())
print
'load dictionary...'
word_dict
=
imdb
.
word_dict
()
dict_dim
=
len
(
word_dict
)
class_dim
=
2
# Please choose the way to build the network
# by uncommenting the corresponding line.
...
...
@@ -246,7 +226,7 @@ if __name__ == '__main__':
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
data_reader
(
test_file
,
dict_file
),
batch_size
=
128
),
lambda
:
imdb
.
test
(
word_dict
),
batch_size
=
128
),
reader_dict
=
{
'word'
:
0
,
'label'
:
1
})
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
...
...
@@ -259,8 +239,8 @@ if __name__ == '__main__':
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
data_reader
(
train_file
,
dict_file
),
buf_size
=
4096
),
batch_size
=
1
28
),
lambda
:
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
batch_size
=
1
00
),
event_handler
=
event_handler
,
reader_dict
=
{
'word'
:
0
,
'label'
:
1
},
...
...
python/paddle/v2/dataset/imdb.py
浏览文件 @
4a265b52
...
...
@@ -118,3 +118,8 @@ def test(word_idx):
return
reader_creator
(
re
.
compile
(
"aclImdb/test/pos/.*\.txt$"
),
re
.
compile
(
"aclImdb/test/neg/.*\.txt$"
),
word_idx
,
1000
)
def
word_dict
():
return
build_dict
(
re
.
compile
(
"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"
),
150
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录