Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
正统之独孤求败
mindspore
提交
ab37e87d
M
mindspore
项目概览
正统之独孤求败
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ab37e87d
编写于
7月 15, 2020
作者:
T
tinazhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adding Mnist python ut coverage
上级
863f4e4f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
246 addition
and
1 deletion
+246
-1
tests/ut/python/dataset/test_datasets_cifarop.py
tests/ut/python/dataset/test_datasets_cifarop.py
+7
-0
tests/ut/python/dataset/test_datasets_mnist.py
tests/ut/python/dataset/test_datasets_mnist.py
+238
-0
tests/ut/python/dataset/test_datasets_sharding.py
tests/ut/python/dataset/test_datasets_sharding.py
+1
-1
未找到文件。
tests/ut/python/dataset/test_datasets_cifarop.py
浏览文件 @
ab37e87d
...
...
@@ -87,6 +87,13 @@ def test_cifar10_basic():
"""
logger
.
info
(
"Test Cifar10Dataset Op"
)
# case 0: test loading the whole dataset
data0
=
ds
.
Cifar10Dataset
(
DATA_DIR_10
)
num_iter0
=
0
for
_
in
data0
.
create_dict_iterator
():
num_iter0
+=
1
assert
num_iter0
==
10000
# case 1: test num_samples
data1
=
ds
.
Cifar10Dataset
(
DATA_DIR_10
,
num_samples
=
100
)
num_iter1
=
0
...
...
tests/ut/python/dataset/test_datasets_mnist.py
0 → 100644
浏览文件 @
ab37e87d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Test Mnist dataset operators
"""
import
os
import
pytest
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
mindspore.dataset
as
ds
from
mindspore
import
log
as
logger
DATA_DIR
=
"../data/dataset/testMnistData"
def
load_mnist
(
path
):
"""
load Mnist data
"""
labels_path
=
os
.
path
.
join
(
path
,
't10k-labels-idx1-ubyte'
)
images_path
=
os
.
path
.
join
(
path
,
't10k-images-idx3-ubyte'
)
with
open
(
labels_path
,
'rb'
)
as
lbpath
:
lbpath
.
read
(
8
)
labels
=
np
.
fromfile
(
lbpath
,
dtype
=
np
.
uint8
)
with
open
(
images_path
,
'rb'
)
as
imgpath
:
imgpath
.
read
(
16
)
images
=
np
.
fromfile
(
imgpath
,
dtype
=
np
.
uint8
)
images
=
images
.
reshape
(
-
1
,
28
,
28
,
1
)
images
[
images
>
0
]
=
255
# Perform binarization to maintain consistency with our API
return
images
,
labels
def
visualize_dataset
(
images
,
labels
):
"""
Helper function to visualize the dataset samples
"""
num_samples
=
len
(
images
)
for
i
in
range
(
num_samples
):
plt
.
subplot
(
1
,
num_samples
,
i
+
1
)
plt
.
imshow
(
images
[
i
].
squeeze
(),
cmap
=
plt
.
cm
.
gray
)
plt
.
title
(
labels
[
i
])
plt
.
show
()
def
test_mnist_content_check
():
"""
Validate MnistDataset image readings
"""
logger
.
info
(
"Test MnistDataset Op with content check"
)
data1
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
100
,
shuffle
=
False
)
images
,
labels
=
load_mnist
(
DATA_DIR
)
num_iter
=
0
# in this example, each dictionary has keys "image" and "label"
image_list
,
label_list
=
[],
[]
for
i
,
data
in
enumerate
(
data1
.
create_dict_iterator
()):
image_list
.
append
(
data
[
"image"
])
label_list
.
append
(
"label {}"
.
format
(
data
[
"label"
]))
np
.
testing
.
assert_array_equal
(
data
[
"image"
],
images
[
i
])
np
.
testing
.
assert_array_equal
(
data
[
"label"
],
labels
[
i
])
num_iter
+=
1
assert
num_iter
==
100
def
test_mnist_basic
():
"""
Validate MnistDataset
"""
logger
.
info
(
"Test MnistDataset Op"
)
# case 1: test loading whole dataset
data1
=
ds
.
MnistDataset
(
DATA_DIR
)
num_iter1
=
0
for
_
in
data1
.
create_dict_iterator
():
num_iter1
+=
1
assert
num_iter1
==
10000
# case 2: test num_samples
data2
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
500
)
num_iter2
=
0
for
_
in
data2
.
create_dict_iterator
():
num_iter2
+=
1
assert
num_iter2
==
500
# case 3: test repeat
data3
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
200
)
data3
=
data3
.
repeat
(
5
)
num_iter3
=
0
for
_
in
data3
.
create_dict_iterator
():
num_iter3
+=
1
assert
num_iter3
==
1000
# case 4: test batch with drop_remainder=False
data4
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
100
)
assert
data4
.
get_dataset_size
()
==
100
assert
data4
.
get_batch_size
()
==
1
data4
=
data4
.
batch
(
batch_size
=
7
)
# drop_remainder is default to be False
assert
data4
.
get_dataset_size
()
==
15
assert
data4
.
get_batch_size
()
==
7
num_iter4
=
0
for
_
in
data4
.
create_dict_iterator
():
num_iter4
+=
1
assert
num_iter4
==
15
# case 5: test batch with drop_remainder=True
data5
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
100
)
assert
data5
.
get_dataset_size
()
==
100
assert
data5
.
get_batch_size
()
==
1
data5
=
data5
.
batch
(
batch_size
=
7
,
drop_remainder
=
True
)
# the rest of incomplete batch will be dropped
assert
data5
.
get_dataset_size
()
==
14
assert
data5
.
get_batch_size
()
==
7
num_iter5
=
0
for
_
in
data5
.
create_dict_iterator
():
num_iter5
+=
1
assert
num_iter5
==
14
def
test_mnist_pk_sampler
():
"""
Test MnistDataset with PKSampler
"""
logger
.
info
(
"Test MnistDataset Op with PKSampler"
)
golden
=
[
0
,
0
,
0
,
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
3
,
4
,
4
,
4
,
5
,
5
,
5
,
6
,
6
,
6
,
7
,
7
,
7
,
8
,
8
,
8
,
9
,
9
,
9
]
sampler
=
ds
.
PKSampler
(
3
)
data
=
ds
.
MnistDataset
(
DATA_DIR
,
sampler
=
sampler
)
num_iter
=
0
label_list
=
[]
for
item
in
data
.
create_dict_iterator
():
label_list
.
append
(
item
[
"label"
])
num_iter
+=
1
np
.
testing
.
assert_array_equal
(
golden
,
label_list
)
assert
num_iter
==
30
def
test_mnist_sequential_sampler
():
"""
Test MnistDataset with SequentialSampler
"""
logger
.
info
(
"Test MnistDataset Op with SequentialSampler"
)
num_samples
=
50
sampler
=
ds
.
SequentialSampler
(
num_samples
=
num_samples
)
data1
=
ds
.
MnistDataset
(
DATA_DIR
,
sampler
=
sampler
)
data2
=
ds
.
MnistDataset
(
DATA_DIR
,
shuffle
=
False
,
num_samples
=
num_samples
)
label_list1
,
label_list2
=
[],
[]
num_iter
=
0
for
item1
,
item2
in
zip
(
data1
.
create_dict_iterator
(),
data2
.
create_dict_iterator
()):
label_list1
.
append
(
item1
[
"label"
])
label_list2
.
append
(
item2
[
"label"
])
num_iter
+=
1
np
.
testing
.
assert_array_equal
(
label_list1
,
label_list2
)
assert
num_iter
==
num_samples
def
test_mnist_exception
():
"""
Test error cases for MnistDataset
"""
logger
.
info
(
"Test error cases for MnistDataset"
)
error_msg_1
=
"sampler and shuffle cannot be specified at the same time"
with
pytest
.
raises
(
RuntimeError
,
match
=
error_msg_1
):
ds
.
MnistDataset
(
DATA_DIR
,
shuffle
=
False
,
sampler
=
ds
.
PKSampler
(
3
))
error_msg_2
=
"sampler and sharding cannot be specified at the same time"
with
pytest
.
raises
(
RuntimeError
,
match
=
error_msg_2
):
ds
.
MnistDataset
(
DATA_DIR
,
sampler
=
ds
.
PKSampler
(
3
),
num_shards
=
2
,
shard_id
=
0
)
error_msg_3
=
"num_shards is specified and currently requires shard_id as well"
with
pytest
.
raises
(
RuntimeError
,
match
=
error_msg_3
):
ds
.
MnistDataset
(
DATA_DIR
,
num_shards
=
10
)
error_msg_4
=
"shard_id is specified but num_shards is not"
with
pytest
.
raises
(
RuntimeError
,
match
=
error_msg_4
):
ds
.
MnistDataset
(
DATA_DIR
,
shard_id
=
0
)
error_msg_5
=
"Input shard_id is not within the required interval"
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_5
):
ds
.
MnistDataset
(
DATA_DIR
,
num_shards
=
5
,
shard_id
=-
1
)
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_5
):
ds
.
MnistDataset
(
DATA_DIR
,
num_shards
=
5
,
shard_id
=
5
)
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_5
):
ds
.
MnistDataset
(
DATA_DIR
,
num_shards
=
2
,
shard_id
=
5
)
error_msg_6
=
"num_parallel_workers exceeds"
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_6
):
ds
.
MnistDataset
(
DATA_DIR
,
shuffle
=
False
,
num_parallel_workers
=
0
)
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_6
):
ds
.
MnistDataset
(
DATA_DIR
,
shuffle
=
False
,
num_parallel_workers
=
65
)
with
pytest
.
raises
(
ValueError
,
match
=
error_msg_6
):
ds
.
MnistDataset
(
DATA_DIR
,
shuffle
=
False
,
num_parallel_workers
=-
2
)
error_msg_7
=
"Argument shard_id"
with
pytest
.
raises
(
TypeError
,
match
=
error_msg_7
):
ds
.
MnistDataset
(
DATA_DIR
,
num_shards
=
2
,
shard_id
=
"0"
)
def
test_mnist_visualize
(
plot
=
False
):
"""
Visualize MnistDataset results
"""
logger
.
info
(
"Test MnistDataset visualization"
)
data1
=
ds
.
MnistDataset
(
DATA_DIR
,
num_samples
=
10
,
shuffle
=
False
)
num_iter
=
0
image_list
,
label_list
=
[],
[]
for
item
in
data1
.
create_dict_iterator
():
image
=
item
[
"image"
]
label
=
item
[
"label"
]
image_list
.
append
(
image
)
label_list
.
append
(
"label {}"
.
format
(
label
))
assert
isinstance
(
image
,
np
.
ndarray
)
assert
image
.
shape
==
(
28
,
28
,
1
)
assert
image
.
dtype
==
np
.
uint8
assert
label
.
dtype
==
np
.
uint32
num_iter
+=
1
assert
num_iter
==
10
if
plot
:
visualize_dataset
(
image_list
,
label_list
)
if
__name__
==
'__main__'
:
test_mnist_content_check
()
test_mnist_basic
()
test_mnist_pk_sampler
()
test_mnist_sequential_sampler
()
test_mnist_exception
()
test_mnist_visualize
(
plot
=
True
)
tests/ut/python/dataset/test_datasets_sharding.py
浏览文件 @
ab37e87d
...
...
@@ -200,7 +200,7 @@ def test_cifar10_shardings(print_res=False):
logger
.
info
(
"labels of dataset: {}"
.
format
(
res
))
return
res
#
6
0000 rows in total. CIFAR reads everything in memory which would make each test case very slow
#
1
0000 rows in total. CIFAR reads everything in memory which would make each test case very slow
# therefore, only 2 test cases for now.
assert
sharding_config
(
10000
,
9999
,
7
,
False
,
1
)
==
[
9
]
assert
sharding_config
(
10000
,
0
,
4
,
False
,
3
)
==
[
0
,
0
,
0
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录