Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
ea297c08
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ea297c08
编写于
4月 17, 2020
作者:
A
anthonyaje
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix dataset serdes for MindDataset
上级
c0c0b098
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
60 addition
and
24 deletion
+60
-24
mindspore/dataset/engine/serializer_deserializer.py
mindspore/dataset/engine/serializer_deserializer.py
+27
-22
tests/ut/python/dataset/test_serdes_dataset.py
tests/ut/python/dataset/test_serdes_dataset.py
+33
-2
未找到文件。
mindspore/dataset/engine/serializer_deserializer.py
浏览文件 @
ea297c08
...
@@ -127,9 +127,12 @@ def serialize_operations(node_repr, key, val):
...
@@ -127,9 +127,12 @@ def serialize_operations(node_repr, key, val):
def
serialize_sampler
(
node_repr
,
val
):
def
serialize_sampler
(
node_repr
,
val
):
"""Serialize sampler object to dictionary."""
"""Serialize sampler object to dictionary."""
node_repr
[
'sampler'
]
=
val
.
__dict__
if
val
is
None
:
node_repr
[
'sampler'
][
'sampler_module'
]
=
type
(
val
).
__module__
node_repr
[
'sampler'
]
=
None
node_repr
[
'sampler'
][
'sampler_name'
]
=
type
(
val
).
__name__
else
:
node_repr
[
'sampler'
]
=
val
.
__dict__
node_repr
[
'sampler'
][
'sampler_module'
]
=
type
(
val
).
__module__
node_repr
[
'sampler'
][
'sampler_name'
]
=
type
(
val
).
__name__
def
traverse
(
node
):
def
traverse
(
node
):
...
@@ -253,9 +256,10 @@ def create_node(node):
...
@@ -253,9 +256,10 @@ def create_node(node):
node
.
get
(
'shuffle'
),
sampler
,
node
.
get
(
'num_shards'
),
node
.
get
(
'shard_id'
))
node
.
get
(
'shuffle'
),
sampler
,
node
.
get
(
'num_shards'
),
node
.
get
(
'shard_id'
))
elif
dataset_op
==
'MindDataset'
:
elif
dataset_op
==
'MindDataset'
:
pyobj
=
pyclass
(
node
[
'dataset_file'
],
node
.
get
(
'column_list'
),
sampler
=
construct_sampler
(
node
.
get
(
'sampler'
))
pyobj
=
pyclass
(
node
[
'dataset_file'
],
node
.
get
(
'columns_list'
),
node
.
get
(
'num_parallel_workers'
),
node
.
get
(
'seed'
),
node
.
get
(
'num_shards'
),
node
.
get
(
'num_parallel_workers'
),
node
.
get
(
'seed'
),
node
.
get
(
'num_shards'
),
node
.
get
(
'shard_id'
),
node
.
get
(
'block_reader'
))
node
.
get
(
'shard_id'
),
node
.
get
(
'block_reader'
)
,
sampler
)
elif
dataset_op
==
'TFRecordDataset'
:
elif
dataset_op
==
'TFRecordDataset'
:
pyobj
=
pyclass
(
node
[
'dataset_files'
],
node
.
get
(
'schema'
),
node
.
get
(
'column_list'
),
pyobj
=
pyclass
(
node
[
'dataset_files'
],
node
.
get
(
'schema'
),
node
.
get
(
'column_list'
),
...
@@ -341,24 +345,25 @@ def create_node(node):
...
@@ -341,24 +345,25 @@ def create_node(node):
def
construct_sampler
(
in_sampler
):
def
construct_sampler
(
in_sampler
):
"""Instantiate Sampler object based on the information from dictionary['sampler']"""
"""Instantiate Sampler object based on the information from dictionary['sampler']"""
sampler_name
=
in_sampler
[
'sampler_name'
]
sampler_module
=
in_sampler
[
'sampler_module'
]
sampler_class
=
getattr
(
sys
.
modules
[
sampler_module
],
sampler_name
)
sampler
=
None
sampler
=
None
if
sampler_name
==
'DistributedSampler'
:
if
in_sampler
is
not
None
:
sampler
=
sampler_class
(
in_sampler
[
'num_shards'
],
in_sampler
[
'shard_id'
],
in_sampler
.
get
(
'shuffle'
))
sampler_name
=
in_sampler
[
'sampler_name'
]
elif
sampler_name
==
'PKSampler'
:
sampler_module
=
in_sampler
[
'sampler_module'
]
sampler
=
sampler_class
(
in_sampler
[
'num_val'
],
in_sampler
.
get
(
'num_class'
),
in_sampler
(
'shuffle'
))
sampler_class
=
getattr
(
sys
.
modules
[
sampler_module
],
sampler_name
)
elif
sampler_name
==
'RandomSampler'
:
if
sampler_name
==
'DistributedSampler'
:
sampler
=
sampler_class
(
in_sampler
.
get
(
'replacement'
),
in_sampler
.
get
(
'num_samples'
))
sampler
=
sampler_class
(
in_sampler
[
'num_shards'
],
in_sampler
[
'shard_id'
],
in_sampler
.
get
(
'shuffle'
))
elif
sampler_name
==
'SequentialSampler'
:
elif
sampler_name
==
'PKSampler'
:
sampler
=
sampler_class
()
sampler
=
sampler_class
(
in_sampler
[
'num_val'
],
in_sampler
.
get
(
'num_class'
),
in_sampler
(
'shuffle'
))
elif
sampler_name
==
'SubsetRandomSampler'
:
elif
sampler_name
==
'RandomSampler'
:
sampler
=
sampler_class
(
in_sampler
[
'indices'
])
sampler
=
sampler_class
(
in_sampler
.
get
(
'replacement'
),
in_sampler
.
get
(
'num_samples'
))
elif
sampler_name
==
'WeightedRandomSampler'
:
elif
sampler_name
==
'SequentialSampler'
:
sampler
=
sampler_class
(
in_sampler
[
'weights'
],
in_sampler
[
'num_samples'
],
in_sampler
.
get
(
'replacement'
))
sampler
=
sampler_class
()
else
:
elif
sampler_name
==
'SubsetRandomSampler'
:
raise
ValueError
(
"Sampler type is unknown: "
+
sampler_name
)
sampler
=
sampler_class
(
in_sampler
[
'indices'
])
elif
sampler_name
==
'WeightedRandomSampler'
:
sampler
=
sampler_class
(
in_sampler
[
'weights'
],
in_sampler
[
'num_samples'
],
in_sampler
.
get
(
'replacement'
))
else
:
raise
ValueError
(
"Sampler type is unknown: "
+
sampler_name
)
return
sampler
return
sampler
...
...
tests/ut/python/dataset/test_serdes_dataset.py
浏览文件 @
ea297c08
...
@@ -19,7 +19,7 @@ import filecmp
...
@@ -19,7 +19,7 @@ import filecmp
import
glob
import
glob
import
json
import
json
import
os
import
os
import
pytest
import
numpy
as
np
import
numpy
as
np
import
mindspore.dataset
as
ds
import
mindspore.dataset
as
ds
...
@@ -28,7 +28,6 @@ import mindspore.dataset.transforms.vision.c_transforms as vision
...
@@ -28,7 +28,6 @@ import mindspore.dataset.transforms.vision.c_transforms as vision
from
mindspore.dataset.transforms.vision
import
Inter
from
mindspore.dataset.transforms.vision
import
Inter
from
mindspore
import
log
as
logger
from
mindspore
import
log
as
logger
def
test_imagefolder
(
remove_json_files
=
True
):
def
test_imagefolder
(
remove_json_files
=
True
):
"""
"""
Test simulating resnet50 dataset pipeline.
Test simulating resnet50 dataset pipeline.
...
@@ -217,6 +216,38 @@ def delete_json_files():
...
@@ -217,6 +216,38 @@ def delete_json_files():
except
IOError
:
except
IOError
:
logger
.
info
(
"Error while deleting: {}"
.
format
(
f
))
logger
.
info
(
"Error while deleting: {}"
.
format
(
f
))
# Test save load minddataset
from
test_minddataset_sampler
import
add_and_remove_cv_file
,
get_data
,
CV_DIR_NAME
,
CV_FILE_NAME
,
FILES_NUM
,
\
FileWriter
,
Inter
def
test_minddataset
(
add_and_remove_cv_file
):
"""tutorial for cv minderdataset."""
columns_list
=
[
"data"
,
"file_name"
,
"label"
]
num_readers
=
4
indices
=
[
1
,
2
,
3
,
5
,
7
]
sampler
=
ds
.
SubsetRandomSampler
(
indices
)
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
+
"0"
,
columns_list
,
num_readers
,
sampler
=
sampler
)
# Serializing into python dictionary
ds1_dict
=
ds
.
serialize
(
data_set
)
# Serializing into json object
ds1_json
=
json
.
dumps
(
ds1_dict
,
sort_keys
=
True
)
# Reconstruct dataset pipeline from its serialized form
data_set
=
ds
.
deserialize
(
input_dict
=
ds1_dict
)
ds2_dict
=
ds
.
serialize
(
data_set
)
# Serializing into json object
ds2_json
=
json
.
dumps
(
ds2_dict
,
sort_keys
=
True
)
assert
ds1_json
==
ds2_json
data
=
get_data
(
CV_DIR_NAME
)
assert
data_set
.
get_dataset_size
()
==
10
num_iter
=
0
for
item
in
data_set
.
create_dict_iterator
():
num_iter
+=
1
assert
num_iter
==
5
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录