Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e6ae1e4f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e6ae1e4f
编写于
8月 08, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Replace the dependency of paddle.v2 dataset
上级
6abe819f
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
29 addition
and
25 deletion
+29
-25
python/paddle/dataset/cifar.py
python/paddle/dataset/cifar.py
+1
-1
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+6
-4
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+5
-4
python/paddle/fluid/tests/unittests/test_data_balance.py
python/paddle/fluid/tests/unittests/test_data_balance.py
+1
-1
python/paddle/fluid/tests/unittests/test_preprocessor.py
python/paddle/fluid/tests/unittests/test_preprocessor.py
+2
-2
python/paddle/fluid/tests/unittests/test_profiler.py
python/paddle/fluid/tests/unittests/test_profiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+10
-9
python/paddle/fluid/tests/unittests/test_reader_reset.py
python/paddle/fluid/tests/unittests/test_reader_reset.py
+1
-1
python/paddle/fluid/tests/unittests/test_recordio_reader.py
python/paddle/fluid/tests/unittests/test_recordio_reader.py
+2
-2
未找到文件。
python/paddle/dataset/cifar.py
浏览文件 @
e6ae1e4f
...
...
@@ -53,7 +53,7 @@ def reader_creator(filename, sub_name, cycle=False):
yield
(
sample
/
255.0
).
astype
(
numpy
.
float32
),
int
(
label
)
def
reader
():
with
tarfile
.
open
(
filename
,
mode
=
'r
b
'
)
as
f
:
with
tarfile
.
open
(
filename
,
mode
=
'r'
)
as
f
:
names
=
(
each_item
.
name
for
each_item
in
f
if
sub_name
in
each_item
.
name
)
...
...
python/paddle/dataset/wmt14.py
浏览文件 @
e6ae1e4f
...
...
@@ -19,10 +19,12 @@ http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and
parse training set and test set into paddle reader creators.
"""
import
six
import
tarfile
import
gzip
import
paddle.dataset.common
import
paddle.fluid.compat
as
cpt
__all__
=
[
'train'
,
...
...
@@ -40,8 +42,8 @@ URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz'
)
MD5_TRAIN
=
'0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92
URL_MODEL
=
'http://paddle
paddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.
gz'
MD5_MODEL
=
'0
cb4a5366189b6acba876491c8724fa3
'
URL_MODEL
=
'http://paddle
models.bj.bcebos.com/wmt/wmt14.t
gz'
MD5_MODEL
=
'0
791583d57d5beb693b9414c5b36798c
'
START
=
"<s>"
END
=
"<e>"
...
...
@@ -54,7 +56,7 @@ def __read_to_dict(tar_file, dict_size):
out_dict
=
dict
()
for
line_count
,
line
in
enumerate
(
fd
):
if
line_count
<
size
:
out_dict
[
line
.
strip
(
)]
=
line_count
out_dict
[
cpt
.
to_literal_str
(
line
.
strip
()
)]
=
line_count
else
:
break
return
out_dict
...
...
@@ -85,7 +87,7 @@ def reader_creator(tar_file, file_name, dict_size):
]
for
name
in
names
:
for
line
in
f
.
extractfile
(
name
):
line_split
=
line
.
strip
().
split
(
'
\t
'
)
line_split
=
line
.
strip
().
split
(
six
.
b
(
'
\t
'
)
)
if
len
(
line_split
)
!=
2
:
continue
src_seq
=
line_split
[
0
]
# one source sequence
...
...
python/paddle/dataset/wmt16.py
浏览文件 @
e6ae1e4f
...
...
@@ -35,6 +35,7 @@ import gzip
from
collections
import
defaultdict
import
paddle.dataset.common
import
paddle.fluid.compat
as
cpt
__all__
=
[
"train"
,
...
...
@@ -82,16 +83,16 @@ def __load_dict(tar_file, dict_size, lang, reverse=False):
dict_path
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"wmt16/%s_%d.dict"
%
(
lang
,
dict_size
))
if
not
os
.
path
.
exists
(
dict_path
)
or
(
len
(
open
(
dict_path
,
"r"
).
readlines
())
!=
dict_size
):
len
(
open
(
dict_path
,
"r
b
"
).
readlines
())
!=
dict_size
):
__build_dict
(
tar_file
,
dict_size
,
dict_path
,
lang
)
word_dict
=
{}
with
open
(
dict_path
,
"r"
)
as
fdict
:
with
open
(
dict_path
,
"r
b
"
)
as
fdict
:
for
idx
,
line
in
enumerate
(
fdict
):
if
reverse
:
word_dict
[
idx
]
=
line
.
strip
(
)
word_dict
[
idx
]
=
cpt
.
to_literal_str
(
line
.
strip
()
)
else
:
word_dict
[
line
.
strip
(
)]
=
idx
word_dict
[
cpt
.
to_literal_str
(
line
.
strip
()
)]
=
idx
return
word_dict
...
...
python/paddle/fluid/tests/unittests/test_data_balance.py
浏览文件 @
e6ae1e4f
...
...
@@ -14,7 +14,7 @@
import
unittest
import
paddle.fluid
as
fluid
import
paddle
as
paddle
import
paddle
import
numpy
as
np
...
...
python/paddle/fluid/tests/unittests/test_preprocessor.py
浏览文件 @
e6ae1e4f
...
...
@@ -15,9 +15,9 @@
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
paddle.v2.dataset.mnist
as
mnist
import
paddle.dataset.mnist
as
mnist
class
TestPreprocessor
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_profiler.py
浏览文件 @
e6ae1e4f
...
...
@@ -93,7 +93,7 @@ class TestProfiler(unittest.TestCase):
"profiler is enabled only with GPU"
)
def
test_all_profiler
(
self
):
self
.
net_profiler
(
'All'
,
'/tmp/profile_out'
)
with
open
(
'/tmp/profile_out'
,
'r'
)
as
f
:
with
open
(
'/tmp/profile_out'
,
'r
b
'
)
as
f
:
self
.
assertGreater
(
len
(
f
.
read
()),
0
)
...
...
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
浏览文件 @
e6ae1e4f
...
...
@@ -14,6 +14,7 @@
import
unittest
import
paddle.fluid.core
as
core
import
paddle.fluid.compat
as
cpt
from
paddle.fluid.framework
import
Program
...
...
@@ -108,7 +109,7 @@ class TestVarDesc(unittest.TestCase):
def
test_shape
(
self
):
program_desc
=
core
.
ProgramDesc
()
block
=
program_desc
.
block
(
0
)
var
=
block
.
var
(
'my_var'
)
var
=
block
.
var
(
cpt
.
to_bytes
(
'my_var'
)
)
var
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
src_shape
=
[
3
,
2
,
10
,
8
]
var
.
set_shape
(
src_shape
)
...
...
@@ -119,7 +120,7 @@ class TestVarDesc(unittest.TestCase):
def
test_multiple_shape
(
self
):
program_desc
=
core
.
ProgramDesc
()
block
=
program_desc
.
block
(
0
)
var
=
block
.
var
(
'my_reader'
)
var
=
block
.
var
(
cpt
.
to_bytes
(
'my_reader'
)
)
var
.
set_type
(
core
.
VarDesc
.
VarType
.
READER
)
src_shapes
=
[[
2
,
3
,
3
],
[
4
,
5
],
[
6
,
7
,
8
,
9
]]
var
.
set_shapes
(
src_shapes
)
...
...
@@ -130,7 +131,7 @@ class TestVarDesc(unittest.TestCase):
def
test_dtype
(
self
):
program_desc
=
core
.
ProgramDesc
()
block
=
program_desc
.
block
(
0
)
var
=
block
.
var
(
'my_var'
)
var
=
block
.
var
(
cpt
.
to_bytes
(
'my_var'
)
)
var
.
set_type
(
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
var
.
set_dtype
(
core
.
VarDesc
.
VarType
.
INT32
)
self
.
assertEqual
(
core
.
VarDesc
.
VarType
.
INT32
,
var
.
dtype
())
...
...
@@ -139,7 +140,7 @@ class TestVarDesc(unittest.TestCase):
def
test_multiple_dtype
(
self
):
program_desc
=
core
.
ProgramDesc
()
block
=
program_desc
.
block
(
0
)
var
=
block
.
var
(
'my_reader'
)
var
=
block
.
var
(
cpt
.
to_bytes
(
'my_reader'
)
)
var
.
set_type
(
core
.
VarDesc
.
VarType
.
READER
)
src_types
=
[
core
.
VarDesc
.
VarType
.
INT32
,
core
.
VarDesc
.
VarType
.
FP64
,
...
...
@@ -152,7 +153,7 @@ class TestVarDesc(unittest.TestCase):
def
test_multiple_lod_level
(
self
):
program_desc
=
core
.
ProgramDesc
()
block
=
program_desc
.
block
(
0
)
var
=
block
.
var
(
'my_reader'
)
var
=
block
.
var
(
cpt
.
to_bytes
(
'my_reader'
)
)
var
.
set_type
(
core
.
VarDesc
.
VarType
.
READER
)
src_types
=
[
3
,
1
,
2
]
var
.
set_lod_levels
(
src_types
)
...
...
@@ -166,12 +167,12 @@ class TestBlockDesc(unittest.TestCase):
self
.
assertIsNotNone
(
program_desc
)
block
=
program_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
var1
=
block
.
var
(
"var1"
)
var2
=
block
.
var
(
"var2"
)
var3
=
block
.
var
(
"var3"
)
var1
=
block
.
var
(
cpt
.
to_bytes
(
"var1"
)
)
var2
=
block
.
var
(
cpt
.
to_bytes
(
"var2"
)
)
var3
=
block
.
var
(
cpt
.
to_bytes
(
"var3"
)
)
all_vars
=
block
.
all_vars
()
self
.
assertEqual
(
set
(
all_vars
),
{
var1
,
var2
,
var3
})
var2_re
=
block
.
find_var
(
"var2"
)
var2_re
=
block
.
find_var
(
cpt
.
to_bytes
(
"var2"
)
)
self
.
assertEqual
(
var2_re
,
var2
)
def
test_add_op
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_reader_reset.py
浏览文件 @
e6ae1e4f
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
import
paddle.fluid
as
fluid
import
paddle
as
paddle
import
paddle
import
numpy
as
np
import
unittest
...
...
python/paddle/fluid/tests/unittests/test_recordio_reader.py
浏览文件 @
e6ae1e4f
...
...
@@ -15,8 +15,8 @@
import
unittest
import
paddle.fluid
as
fluid
import
paddle
.v2
as
paddle
import
paddle.
v2.
dataset.mnist
as
mnist
import
paddle
import
paddle.dataset.mnist
as
mnist
class
TestRecordIO
(
unittest
.
TestCase
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录