Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c5a8ffe4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c5a8ffe4
编写于
4月 29, 2020
作者:
J
Junhan Hu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add schema support for GeneratorDataset
上级
4eb83ffa
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
60 addition
and
8 deletion
+60
-8
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+16
-5
mindspore/dataset/engine/validators.py
mindspore/dataset/engine/validators.py
+8
-3
tests/ut/python/dataset/test_generator.py
tests/ut/python/dataset/test_generator.py
+36
-0
未找到文件。
mindspore/dataset/engine/datasets.py
浏览文件 @
c5a8ffe4
...
...
@@ -2504,11 +2504,12 @@ class GeneratorDataset(SourceDataset):
Iterable source is required to return a tuple of numpy array as a row of the dataset on iter(source).next().
Random accessible source is required to return a tuple of numpy array as a row of the dataset on
source[idx].
column_names (list[str]): List of column names of the dataset.
column_names (list[str], optional): List of column names of the dataset (default=None). Users are required to
provide either column_names or schema.
column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None).
If provided, sanity check will be performed on generator output.
schema (Schema/String, optional): Path to the json schema file or schema object (default=None).
If the schema is not provided, the meta data from column_names and column_types is considered the schema
.
schema (Schema/String, optional): Path to the json schema file or schema object (default=None).
Users are
required to provide either column_names or schema. If both are provided, schema will be used
.
num_samples (int, optional): The number of samples to be included in the dataset
(default=None, all images).
num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
...
...
@@ -2555,8 +2556,8 @@ class GeneratorDataset(SourceDataset):
"""
@
check_generatordataset
def
__init__
(
self
,
source
,
column_names
,
column_types
=
None
,
schema
=
None
,
num_samples
=
None
,
num_parallel_workers
=
1
,
shuffle
=
None
,
sampler
=
None
,
num_shards
=
None
,
shard_id
=
None
):
def
__init__
(
self
,
source
,
column_names
=
None
,
column_types
=
None
,
schema
=
None
,
num_samples
=
None
,
num_parallel_workers
=
1
,
shuffle
=
None
,
sampler
=
None
,
num_shards
=
None
,
shard_id
=
None
):
super
().
__init__
(
num_parallel_workers
)
self
.
sampler
=
_select_sampler
(
num_samples
,
sampler
,
shuffle
,
num_shards
,
shard_id
)
if
self
.
sampler
is
not
None
and
hasattr
(
source
,
"__getitem__"
):
...
...
@@ -2598,6 +2599,16 @@ class GeneratorDataset(SourceDataset):
else
:
self
.
column_types
=
column_types
if
schema
is
not
None
:
self
.
schema
=
schema
if
not
isinstance
(
schema
,
Schema
):
self
.
schema
=
Schema
(
schema
)
self
.
column_names
=
[]
self
.
column_types
=
[]
for
col
in
self
.
schema
.
columns
:
self
.
column_names
.
append
(
col
[
"name"
])
self
.
column_types
.
append
(
DataType
(
col
[
"type"
]))
def
get_args
(
self
):
args
=
super
().
get_args
()
args
[
"source"
]
=
self
.
source
...
...
mindspore/dataset/engine/validators.py
浏览文件 @
c5a8ffe4
...
...
@@ -555,10 +555,15 @@ def check_generatordataset(method):
except
TypeError
:
raise
TypeError
(
"source should be callable, iterable or random accessible"
)
# check column_names; required argument
# check column_names
or schema
; required argument
column_names
=
param_dict
.
get
(
'column_names'
)
if
column_names
is
None
:
raise
ValueError
(
"column_names is not provided."
)
schema
=
param_dict
.
get
(
'schema'
)
if
column_names
is
None
and
schema
is
None
:
raise
ValueError
(
"Neither columns_names not schema are provided."
)
if
schema
is
not
None
:
if
not
isinstance
(
schema
,
datasets
.
Schema
)
and
not
isinstance
(
schema
,
str
):
raise
ValueError
(
"schema should be a path to schema file or a schema object."
)
# check optional argument
nreq_param_int
=
[
"num_samples"
,
"num_parallel_workers"
,
"num_shards"
,
"shard_id"
]
...
...
tests/ut/python/dataset/test_generator.py
浏览文件 @
c5a8ffe4
...
...
@@ -580,6 +580,41 @@ def test_num_samples_underflow():
count
=
count
+
1
assert
count
==
64
def
type_tester_with_type_check_2c_schema
(
t
,
c
):
logger
.
info
(
"Test with Type {}"
.
format
(
t
.
__name__
))
schema
=
ds
.
Schema
()
schema
.
add_column
(
"data0"
,
c
[
0
])
schema
.
add_column
(
"data1"
,
c
[
1
])
# apply dataset operations
data1
=
ds
.
GeneratorDataset
((
lambda
:
generator_with_type_2c
(
t
)),
schema
=
schema
)
data1
=
data1
.
batch
(
4
)
i
=
0
for
item
in
data1
.
create_dict_iterator
():
# each data is a dictionary
golden
=
np
.
array
([[
i
],
[
i
+
1
],
[
i
+
2
],
[
i
+
3
]],
dtype
=
t
)
assert
np
.
array_equal
(
item
[
"data0"
],
golden
)
i
=
i
+
4
def
test_schema
():
"""
Test 2 column Generator on different data type with type check with schema input
"""
logger
.
info
(
"Test 2 column Generator on all data types with type check"
)
np_types
=
[
np
.
int8
,
np
.
int16
,
np
.
int32
,
np
.
int64
,
np
.
uint8
,
np
.
uint16
,
np
.
uint32
,
np
.
uint64
,
np
.
float32
,
np
.
float64
]
de_types
=
[
mstype
.
int8
,
mstype
.
int16
,
mstype
.
int32
,
mstype
.
int64
,
mstype
.
uint8
,
mstype
.
uint16
,
mstype
.
uint32
,
mstype
.
uint64
,
mstype
.
float32
,
mstype
.
float64
]
for
i
in
range
(
len
(
np_types
)):
type_tester_with_type_check_2c_schema
(
np_types
[
i
],
[
de_types
[
i
],
de_types
[
i
]])
def
manual_test_keyborad_interrupt
():
"""
Test keyborad_interrupt
...
...
@@ -626,5 +661,6 @@ if __name__ == "__main__":
test_sequential_sampler
()
test_distributed_sampler
()
test_random_sampler
()
test_schema
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录