Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
812e21f3
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
812e21f3
编写于
2月 27, 2017
作者:
W
wen-bo-yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cross reading sample files and fix bugs
上级
55d19fc4
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
30 addition
and
22 deletion
+30
-22
paddle/setup.py.in
paddle/setup.py.in
+1
-1
paddle/v2/dataset/config.py
paddle/v2/dataset/config.py
+0
-8
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+29
-13
未找到文件。
paddle/setup.py.in
浏览文件 @
812e21f3
...
...
@@ -72,7 +72,7 @@ setup(name="py_paddle",
packages=['py_paddle'],
include_dirs = include_dirs,
install_requires = [
'nltk',
'nltk
>=3.2.2
',
'numpy>=1.8.0', # The numpy is required.
'protobuf>=3.0.0' # The paddle protobuf version
],
...
...
paddle/v2/dataset/config.py
已删除
100644 → 0
浏览文件 @
55d19fc4
import
os
__all__
=
[
'DATA_HOME'
]
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset'
)
if
not
os
.
path
.
exists
(
DATA_HOME
):
os
.
makedirs
(
DATA_HOME
)
paddle/v2/dataset/sentiment.py
→
p
ython/p
addle/v2/dataset/sentiment.py
浏览文件 @
812e21f3
...
...
@@ -20,9 +20,9 @@ The script fetch and preprocess movie_reviews data set
that provided by NLTK
"""
import
nltk
import
numpy
as
np
from
itertools
import
chain
from
nltk.corpus
import
movie_reviews
from
config
import
DATA_HOME
...
...
@@ -50,9 +50,10 @@ def download_data_if_not_yet():
except
LookupError
:
print
"Downloading movie_reviews data set, please wait....."
nltk
.
download
(
'movie_reviews'
,
download_dir
=
DATA_HOME
)
print
"Download data set success......"
# make sure that nltk can find the data
nltk
.
data
.
path
.
append
(
DATA_HOME
)
print
"Download data set success....."
print
"Path is "
+
nltk
.
data
.
find
(
'corpora/movie_reviews'
).
path
def
get_word_dict
():
...
...
@@ -67,24 +68,39 @@ def get_word_dict():
words_sort_list
=
words_freq
.
items
()
words_sort_list
.
sort
(
cmp
=
lambda
a
,
b
:
b
[
1
]
-
a
[
1
])
for
index
,
word
in
enumerate
(
words_sort_list
):
words_freq_sorted
.
append
(
word
[
0
]
)
words_freq_sorted
.
append
(
(
word
[
0
],
index
+
1
)
)
return
words_freq_sorted
def
sort_files
():
"""
Sorted the sample for cross reading the sample
:return:
files_list
"""
files_list
=
list
()
download_data_if_not_yet
()
neg_file_list
=
movie_reviews
.
fileids
(
'neg'
)
pos_file_list
=
movie_reviews
.
fileids
(
'pos'
)
files_list
=
list
(
chain
.
from_iterable
(
zip
(
neg_file_list
,
pos_file_list
)))
return
files_list
def
load_sentiment_data
():
"""
Load the data set
:return:
data_set
"""
label_dict
=
get_label_dic
t
()
data_set
=
lis
t
()
download_data_if_not_yet
()
words_freq
=
nltk
.
FreqDist
(
w
.
lower
()
for
w
in
movie_reviews
.
words
())
data_set
=
[([
words_freq
[
word
.
lower
()]
for
word
in
movie_reviews
.
words
(
fileid
)],
label_dict
[
category
])
for
category
in
movie_reviews
.
categories
()
for
fileid
in
movie_reviews
.
fileids
(
category
)]
words_ids
=
dict
(
get_word_dict
())
for
sample_file
in
sort_files
():
words_list
=
list
()
category
=
0
if
'neg'
in
sample_file
else
1
for
word
in
movie_reviews
.
words
(
sample_file
):
words_list
.
append
(
words_ids
[
word
.
lower
()])
data_set
.
append
((
words_list
,
category
))
return
data_set
...
...
@@ -98,9 +114,9 @@ def reader_creator(data):
train data set or test data set
"""
for
each
in
data
:
sentences
=
np
.
array
(
each
[
0
],
dtype
=
np
.
int32
)
label
s
=
np
.
array
(
each
[
1
],
dtype
=
np
.
int8
)
yield
sentences
,
labels
list_of_int
=
np
.
array
(
each
[
0
],
dtype
=
np
.
int32
)
label
=
each
[
1
]
yield
list_of_int
,
label
def
train
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录