Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
06ed9ffd
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
06ed9ffd
编写于
7月 24, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 24, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3183 Remove files on test fail for test_minddataset*.py
Merge pull request !3183 from tony_liu2/master
上级
60feafd7
1f4251a4
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
1313 addition
and
1090 deletion
+1313
-1090
tests/ut/python/dataset/test_minddataset.py
tests/ut/python/dataset/test_minddataset.py
+1069
-963
tests/ut/python/dataset/test_minddataset_exception.py
tests/ut/python/dataset/test_minddataset_exception.py
+69
-17
tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
...thon/dataset/test_minddataset_multi_images_and_ndarray.py
+59
-49
tests/ut/python/dataset/test_minddataset_padded.py
tests/ut/python/dataset/test_minddataset_padded.py
+71
-43
tests/ut/python/dataset/test_minddataset_sampler.py
tests/ut/python/dataset/test_minddataset_sampler.py
+45
-18
未找到文件。
tests/ut/python/dataset/test_minddataset.py
浏览文件 @
06ed9ffd
此差异已折叠。
点击以展开。
tests/ut/python/dataset/test_minddataset_exception.py
浏览文件 @
06ed9ffd
...
@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
...
@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
num_iter
==
0
try
:
os
.
remove
(
'dummy.mindrecord'
)
assert
num_iter
==
0
except
Exception
as
error
:
os
.
remove
(
'dummy.mindrecord'
)
raise
error
else
:
os
.
remove
(
'dummy.mindrecord'
)
def
test_minddataset_lack_db
():
def
test_minddataset_lack_db
():
...
@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
...
@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
num_iter
==
0
try
:
os
.
remove
(
CV_FILE_NAME
)
assert
num_iter
==
0
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
raise
error
else
:
os
.
remove
(
CV_FILE_NAME
)
def
test_cv_minddataset_pk_sample_error_class_column
():
def
test_cv_minddataset_pk_sample_error_class_column
():
...
@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
...
@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
'Input shard_id is not within the required interval of (0 to 0).'
in
str
(
error_info
.
value
)
try
:
assert
'Input shard_id is not within the required interval of (0 to 0).'
in
str
(
error_info
.
value
)
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
raise
error
else
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
def
test_minddataset_invalidate_shard_id
():
def
test_minddataset_invalidate_shard_id
():
create_cv_mindrecord
(
1
)
create_cv_mindrecord
(
1
)
...
@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
...
@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
'Input shard_id is not within the required interval of (0 to 0).'
in
str
(
error_info
.
value
)
try
:
os
.
remove
(
CV_FILE_NAME
)
assert
'Input shard_id is not within the required interval of (0 to 0).'
in
str
(
error_info
.
value
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
raise
error
else
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
def
test_minddataset_shard_id_bigger_than_num_shard
():
def
test_minddataset_shard_id_bigger_than_num_shard
():
...
@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
...
@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
'Input shard_id is not within the required interval of (0 to 1).'
in
str
(
error_info
.
value
)
try
:
assert
'Input shard_id is not within the required interval of (0 to 1).'
in
str
(
error_info
.
value
)
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
raise
error
with
pytest
.
raises
(
Exception
)
as
error_info
:
with
pytest
.
raises
(
Exception
)
as
error_info
:
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
,
columns_list
,
num_readers
,
True
,
2
,
5
)
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
,
columns_list
,
num_readers
,
True
,
2
,
5
)
num_iter
=
0
num_iter
=
0
for
_
in
data_set
.
create_dict_iterator
():
for
_
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
num_iter
+=
1
assert
'Input shard_id is not within the required interval of (0 to 1).'
in
str
(
error_info
.
value
)
try
:
assert
'Input shard_id is not within the required interval of (0 to 1).'
in
str
(
error_info
.
value
)
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
raise
error
else
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
def
test_cv_minddataset_partition_num_samples_equals_0
():
def
test_cv_minddataset_partition_num_samples_equals_0
():
"""tutorial for cv minddataset."""
"""tutorial for cv minddataset."""
...
@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
...
@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
num_iter
+=
1
num_iter
+=
1
with
pytest
.
raises
(
Exception
)
as
error_info
:
with
pytest
.
raises
(
Exception
)
as
error_info
:
partitions
(
5
)
partitions
(
5
)
assert
'num_samples should be a positive integer value, but got num_samples=0'
in
str
(
error_info
.
value
)
try
:
assert
'num_samples should be a positive integer value, but got num_samples=0'
in
str
(
error_info
.
value
)
except
Exception
as
error
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
raise
error
else
:
os
.
remove
(
CV_FILE_NAME
)
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
os
.
remove
(
CV_FILE_NAME
)
if
__name__
==
'__main__'
:
os
.
remove
(
"{}.db"
.
format
(
CV_FILE_NAME
))
test_cv_lack_json
()
test_cv_lack_mindrecord
()
test_invalid_mindrecord
()
test_minddataset_lack_db
()
test_cv_minddataset_pk_sample_error_class_column
()
test_cv_minddataset_pk_sample_exclusive_shuffle
()
test_cv_minddataset_reader_different_schema
()
test_cv_minddataset_reader_different_page_size
()
test_minddataset_invalidate_num_shards
()
test_minddataset_invalidate_shard_id
()
test_minddataset_shard_id_bigger_than_num_shard
()
test_cv_minddataset_partition_num_samples_equals_0
()
tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
浏览文件 @
06ed9ffd
...
@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"
...
@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"
def
test_cv_minddataset_reader_multi_image_and_ndarray_tutorial
():
def
test_cv_minddataset_reader_multi_image_and_ndarray_tutorial
():
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
try
:
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
"image_0"
:
{
"type"
:
"bytes"
},
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
"image_2"
:
{
"type"
:
"bytes"
},
"image_0"
:
{
"type"
:
"bytes"
},
"image_3"
:
{
"type"
:
"bytes"
},
"image_2"
:
{
"type"
:
"bytes"
},
"image_4"
:
{
"type"
:
"bytes"
},
"image_3"
:
{
"type"
:
"bytes"
},
"input_mask"
:
{
"type"
:
"int32"
,
"shape"
:
[
-
1
]},
"image_4"
:
{
"type"
:
"bytes"
},
"segments"
:
{
"type"
:
"float32"
,
"shape"
:
[
2
,
3
]}}
"input_mask"
:
{
"type"
:
"int32"
,
"shape"
:
[
-
1
]},
writer
.
add_schema
(
cv_schema_json
,
"two_images_schema"
)
"segments"
:
{
"type"
:
"float32"
,
"shape"
:
[
2
,
3
]}}
with
open
(
"../data/mindrecord/testImageNetData/images/image_00010.jpg"
,
"rb"
)
as
file_reader
:
writer
.
add_schema
(
cv_schema_json
,
"two_images_schema"
)
img_data
=
file_reader
.
read
()
with
open
(
"../data/mindrecord/testImageNetData/images/image_00010.jpg"
,
"rb"
)
as
file_reader
:
ndarray_1
=
np
.
array
([
1
,
2
,
3
,
4
,
5
],
np
.
int32
)
img_data
=
file_reader
.
read
()
ndarray_2
=
np
.
array
(([
2
,
3
,
1
],
[
7
,
9
,
0
]),
np
.
float32
)
ndarray_1
=
np
.
array
([
1
,
2
,
3
,
4
,
5
],
np
.
int32
)
data
=
[]
ndarray_2
=
np
.
array
(([
2
,
3
,
1
],
[
7
,
9
,
0
]),
np
.
float32
)
for
i
in
range
(
5
):
data
=
[]
item
=
{
"id"
:
i
,
"image_0"
:
img_data
,
"image_2"
:
img_data
,
"image_3"
:
img_data
,
"image_4"
:
img_data
,
for
i
in
range
(
5
):
"input_mask"
:
ndarray_1
,
"segments"
:
ndarray_2
}
item
=
{
"id"
:
i
,
"image_0"
:
img_data
,
"image_2"
:
img_data
,
"image_3"
:
img_data
,
"image_4"
:
img_data
,
data
.
append
(
item
)
"input_mask"
:
ndarray_1
,
"segments"
:
ndarray_2
}
writer
.
write_raw_data
(
data
)
data
.
append
(
item
)
writer
.
commit
()
writer
.
write_raw_data
(
data
)
assert
os
.
path
.
exists
(
CV_FILE_NAME
)
writer
.
commit
()
assert
os
.
path
.
exists
(
CV_FILE_NAME
+
".db"
)
assert
os
.
path
.
exists
(
CV_FILE_NAME
)
assert
os
.
path
.
exists
(
CV_FILE_NAME
+
".db"
)
# tutorial for minderdataset.
# tutorial for minderdataset.
columns_list
=
[
"id"
,
"image_0"
,
"image_2"
,
"image_3"
,
"image_4"
,
"input_mask"
,
"segments"
]
columns_list
=
[
"id"
,
"image_0"
,
"image_2"
,
"image_3"
,
"image_4"
,
"input_mask"
,
"segments"
]
num_readers
=
1
num_readers
=
1
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
,
columns_list
,
num_readers
)
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
,
columns_list
,
num_readers
)
assert
data_set
.
get_dataset_size
()
==
5
assert
data_set
.
get_dataset_size
()
==
5
num_iter
=
0
num_iter
=
0
for
item
in
data_set
.
create_dict_iterator
():
for
item
in
data_set
.
create_dict_iterator
():
assert
len
(
item
)
==
7
assert
len
(
item
)
==
7
logger
.
info
(
"item: {}"
.
format
(
item
))
logger
.
info
(
"item: {}"
.
format
(
item
))
assert
item
[
"image_0"
].
dtype
==
np
.
uint8
assert
item
[
"image_0"
].
dtype
==
np
.
uint8
assert
(
item
[
"image_0"
]
==
item
[
"image_2"
]).
all
()
assert
(
item
[
"image_0"
]
==
item
[
"image_2"
]).
all
()
assert
(
item
[
"image_3"
]
==
item
[
"image_4"
]).
all
()
assert
(
item
[
"image_3"
]
==
item
[
"image_4"
]).
all
()
assert
(
item
[
"image_0"
]
==
item
[
"image_4"
]).
all
()
assert
(
item
[
"image_0"
]
==
item
[
"image_4"
]).
all
()
assert
item
[
"image_2"
].
dtype
==
np
.
uint8
assert
item
[
"image_2"
].
dtype
==
np
.
uint8
assert
item
[
"image_3"
].
dtype
==
np
.
uint8
assert
item
[
"image_3"
].
dtype
==
np
.
uint8
assert
item
[
"image_4"
].
dtype
==
np
.
uint8
assert
item
[
"image_4"
].
dtype
==
np
.
uint8
assert
item
[
"id"
].
dtype
==
np
.
int32
assert
item
[
"id"
].
dtype
==
np
.
int32
assert
item
[
"input_mask"
].
shape
==
(
5
,)
assert
item
[
"input_mask"
].
shape
==
(
5
,)
assert
item
[
"input_mask"
].
dtype
==
np
.
int32
assert
item
[
"input_mask"
].
dtype
==
np
.
int32
assert
item
[
"segments"
].
shape
==
(
2
,
3
)
assert
item
[
"segments"
].
shape
==
(
2
,
3
)
assert
item
[
"segments"
].
dtype
==
np
.
float32
assert
item
[
"segments"
].
dtype
==
np
.
float32
num_iter
+=
1
num_iter
+=
1
assert
num_iter
==
5
assert
num_iter
==
5
except
Exception
as
error
:
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
+
".db"
)):
os
.
remove
(
CV_FILE_NAME
+
".db"
)
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
)):
os
.
remove
(
CV_FILE_NAME
)
raise
error
else
:
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
+
".db"
)):
os
.
remove
(
CV_FILE_NAME
+
".db"
)
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
)):
os
.
remove
(
CV_FILE_NAME
)
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
+
".db"
)):
if
__name__
==
'__main__'
:
os
.
remove
(
CV_FILE_NAME
+
".db"
)
test_cv_minddataset_reader_multi_image_and_ndarray_tutorial
()
if
os
.
path
.
exists
(
"{}"
.
format
(
CV_FILE_NAME
)):
os
.
remove
(
CV_FILE_NAME
)
tests/ut/python/dataset/test_minddataset_padded.py
浏览文件 @
06ed9ffd
...
@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
...
@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
"""add/remove cv file"""
paths
=
[
"{}{}"
.
format
(
CV_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
paths
=
[
"{}{}"
.
format
(
CV_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
for
x
in
range
(
FILES_NUM
)]
for
x
in
range
(
FILES_NUM
)]
for
x
in
paths
:
try
:
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}"
.
format
(
x
))
else
None
for
x
in
paths
:
os
.
remove
(
"{}.db"
.
format
(
x
))
if
os
.
path
.
exists
(
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}"
.
format
(
x
))
else
None
"{}.db"
.
format
(
x
))
else
None
os
.
remove
(
"{}.db"
.
format
(
x
))
if
os
.
path
.
exists
(
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
"{}.db"
.
format
(
x
))
else
None
data
=
get_data
(
CV_DIR_NAME
)
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
data
=
get_data
(
CV_DIR_NAME
)
"file_name"
:
{
"type"
:
"string"
},
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
"label"
:
{
"type"
:
"int32"
},
"file_name"
:
{
"type"
:
"string"
},
"data"
:
{
"type"
:
"bytes"
}}
"label"
:
{
"type"
:
"int32"
},
writer
.
add_schema
(
cv_schema_json
,
"img_schema"
)
"data"
:
{
"type"
:
"bytes"
}}
writer
.
add_index
([
"file_name"
,
"label"
])
writer
.
add_schema
(
cv_schema_json
,
"img_schema"
)
writer
.
write_raw_data
(
data
)
writer
.
add_index
([
"file_name"
,
"label"
])
writer
.
commit
()
writer
.
write_raw_data
(
data
)
yield
"yield_cv_data"
writer
.
commit
()
for
x
in
paths
:
yield
"yield_cv_data"
os
.
remove
(
"{}"
.
format
(
x
))
except
Exception
as
error
:
os
.
remove
(
"{}.db"
.
format
(
x
))
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
raise
error
else
:
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
@
pytest
.
fixture
@
pytest
.
fixture
...
@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
...
@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
"""add/remove nlp file"""
"""add/remove nlp file"""
paths
=
[
"{}{}"
.
format
(
NLP_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
paths
=
[
"{}{}"
.
format
(
NLP_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
for
x
in
range
(
FILES_NUM
)]
for
x
in
range
(
FILES_NUM
)]
for
x
in
paths
:
try
:
if
os
.
path
.
exists
(
"{}"
.
format
(
x
)):
for
x
in
paths
:
if
os
.
path
.
exists
(
"{}"
.
format
(
x
)):
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}.db"
.
format
(
x
)):
os
.
remove
(
"{}.db"
.
format
(
x
))
writer
=
FileWriter
(
NLP_FILE_NAME
,
FILES_NUM
)
data
=
[
x
for
x
in
get_nlp_data
(
NLP_FILE_POS
,
NLP_FILE_VOCAB
,
10
)]
nlp_schema_json
=
{
"id"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
"rating"
:
{
"type"
:
"float32"
},
"input_ids"
:
{
"type"
:
"int64"
,
"shape"
:
[
-
1
]},
"input_mask"
:
{
"type"
:
"int64"
,
"shape"
:
[
1
,
-
1
]},
"segment_ids"
:
{
"type"
:
"int64"
,
"shape"
:
[
2
,
-
1
]}
}
writer
.
set_header_size
(
1
<<
14
)
writer
.
set_page_size
(
1
<<
15
)
writer
.
add_schema
(
nlp_schema_json
,
"nlp_schema"
)
writer
.
add_index
([
"id"
,
"rating"
])
writer
.
write_raw_data
(
data
)
writer
.
commit
()
yield
"yield_nlp_data"
except
Exception
as
error
:
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
raise
error
else
:
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}.db"
.
format
(
x
)):
os
.
remove
(
"{}.db"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
writer
=
FileWriter
(
NLP_FILE_NAME
,
FILES_NUM
)
data
=
[
x
for
x
in
get_nlp_data
(
NLP_FILE_POS
,
NLP_FILE_VOCAB
,
10
)]
nlp_schema_json
=
{
"id"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
"rating"
:
{
"type"
:
"float32"
},
"input_ids"
:
{
"type"
:
"int64"
,
"shape"
:
[
-
1
]},
"input_mask"
:
{
"type"
:
"int64"
,
"shape"
:
[
1
,
-
1
]},
"segment_ids"
:
{
"type"
:
"int64"
,
"shape"
:
[
2
,
-
1
]}
}
writer
.
set_header_size
(
1
<<
14
)
writer
.
set_page_size
(
1
<<
15
)
writer
.
add_schema
(
nlp_schema_json
,
"nlp_schema"
)
writer
.
add_index
([
"id"
,
"rating"
])
writer
.
write_raw_data
(
data
)
writer
.
commit
()
yield
"yield_nlp_data"
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
def
test_cv_minddataset_reader_basic_padded_samples
(
add_and_remove_cv_file
):
def
test_cv_minddataset_reader_basic_padded_samples
(
add_and_remove_cv_file
):
"""tutorial for cv minderdataset."""
"""tutorial for cv minderdataset."""
...
@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
...
@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
encoding
=
'utf8'
)
encoding
=
'utf8'
)
assert
item
[
'label'
]
==
padded_sample
[
'label'
]
assert
item
[
'label'
]
==
padded_sample
[
'label'
]
assert
(
item
[
'data'
]
==
np
.
array
(
list
(
padded_sample
[
'data'
]))).
all
()
assert
(
item
[
'data'
]
==
np
.
array
(
list
(
padded_sample
[
'data'
]))).
all
()
num_iter
+=
1
num_iter
+=
1
assert
num_padded_iter
==
5
assert
num_padded_iter
==
5
assert
num_iter
==
15
assert
num_iter
==
15
...
@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
...
@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
mask
=
[
1
]
*
length
+
[
0
]
*
(
maxlen
-
length
)
mask
=
[
1
]
*
length
+
[
0
]
*
(
maxlen
-
length
)
segment
=
[
0
]
*
maxlen
segment
=
[
0
]
*
maxlen
return
input_
,
mask
,
segment
return
input_
,
mask
,
segment
if
__name__
==
'__main__'
:
test_cv_minddataset_reader_basic_padded_samples
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_multi_epoch
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_no_dividsible
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_no_equal_column_list
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_no_column_list
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_no_num_padded
(
add_and_remove_cv_file
)
test_cv_minddataset_partition_padded_samples_no_padded_samples
(
add_and_remove_cv_file
)
test_nlp_minddataset_reader_basic_padded_samples
(
add_and_remove_nlp_file
)
test_nlp_minddataset_reader_basic_padded_samples_multi_epoch
(
add_and_remove_nlp_file
)
test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch
(
add_and_remove_nlp_file
)
tests/ut/python/dataset/test_minddataset_sampler.py
浏览文件 @
06ed9ffd
...
@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
...
@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
"""add/remove cv file"""
paths
=
[
"{}{}"
.
format
(
CV_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
paths
=
[
"{}{}"
.
format
(
CV_FILE_NAME
,
str
(
x
).
rjust
(
1
,
'0'
))
for
x
in
range
(
FILES_NUM
)]
for
x
in
range
(
FILES_NUM
)]
for
x
in
paths
:
try
:
if
os
.
path
.
exists
(
"{}"
.
format
(
x
)):
for
x
in
paths
:
if
os
.
path
.
exists
(
"{}"
.
format
(
x
)):
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}.db"
.
format
(
x
)):
os
.
remove
(
"{}.db"
.
format
(
x
))
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
data
=
get_data
(
CV_DIR_NAME
,
True
)
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
"file_name"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
"data"
:
{
"type"
:
"bytes"
}}
writer
.
add_schema
(
cv_schema_json
,
"img_schema"
)
writer
.
add_index
([
"file_name"
,
"label"
])
writer
.
write_raw_data
(
data
)
writer
.
commit
()
yield
"yield_cv_data"
except
Exception
as
error
:
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
raise
error
else
:
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}.db"
.
format
(
x
)):
os
.
remove
(
"{}.db"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
data
=
get_data
(
CV_DIR_NAME
,
True
)
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
"file_name"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
"data"
:
{
"type"
:
"bytes"
}}
writer
.
add_schema
(
cv_schema_json
,
"img_schema"
)
writer
.
add_index
([
"file_name"
,
"label"
])
writer
.
write_raw_data
(
data
)
writer
.
commit
()
yield
"yield_cv_data"
for
x
in
paths
:
os
.
remove
(
"{}"
.
format
(
x
))
os
.
remove
(
"{}.db"
.
format
(
x
))
def
test_cv_minddataset_pk_sample_no_column
(
add_and_remove_cv_file
):
def
test_cv_minddataset_pk_sample_no_column
(
add_and_remove_cv_file
):
"""tutorial for cv minderdataset."""
"""tutorial for cv minderdataset."""
...
@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
...
@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
except
FileNotFoundError
:
except
FileNotFoundError
:
continue
continue
return
data_list
return
data_list
if
__name__
==
'__main__'
:
test_cv_minddataset_pk_sample_no_column
(
add_and_remove_cv_file
)
test_cv_minddataset_pk_sample_basic
(
add_and_remove_cv_file
)
test_cv_minddataset_pk_sample_shuffle
(
add_and_remove_cv_file
)
test_cv_minddataset_pk_sample_out_of_range
(
add_and_remove_cv_file
)
test_cv_minddataset_subset_random_sample_basic
(
add_and_remove_cv_file
)
test_cv_minddataset_subset_random_sample_replica
(
add_and_remove_cv_file
)
test_cv_minddataset_subset_random_sample_empty
(
add_and_remove_cv_file
)
test_cv_minddataset_subset_random_sample_out_of_range
(
add_and_remove_cv_file
)
test_cv_minddataset_subset_random_sample_negative
(
add_and_remove_cv_file
)
test_cv_minddataset_random_sampler_basic
(
add_and_remove_cv_file
)
test_cv_minddataset_random_sampler_repeat
(
add_and_remove_cv_file
)
test_cv_minddataset_random_sampler_replacement
(
add_and_remove_cv_file
)
test_cv_minddataset_sequential_sampler_basic
(
add_and_remove_cv_file
)
test_cv_minddataset_sequential_sampler_exceed_size
(
add_and_remove_cv_file
)
test_cv_minddataset_split_basic
(
add_and_remove_cv_file
)
test_cv_minddataset_split_exact_percent
(
add_and_remove_cv_file
)
test_cv_minddataset_split_fuzzy_percent
(
add_and_remove_cv_file
)
test_cv_minddataset_split_deterministic
(
add_and_remove_cv_file
)
test_cv_minddataset_split_sharding
(
add_and_remove_cv_file
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录