Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
6a55d7ca
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6a55d7ca
编写于
7月 28, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 28, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3571 fix the description of cache
Merge pull request !3571 from guansongsong/gss/fix_cache_core
上级
bca16792
1906ed1b
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
52 addition
and
11 deletion
+52
-11
mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
...re/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
+3
-0
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+14
-5
tests/ut/python/dataset/test_cache_map.py
tests/ut/python/dataset/test_cache_map.py
+35
-6
未找到文件。
mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc
浏览文件 @
6a55d7ca
...
...
@@ -387,6 +387,9 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
ss_str
=
std
::
regex_replace
(
ss_str
,
std
::
regex
(
"Num workers.*
\n
"
),
""
);
ss_str
=
std
::
regex_replace
(
ss_str
,
std
::
regex
(
"
\\
[workers.*
\\
]"
),
""
);
// Filter out Number of rows when generating the check sum
ss_str
=
std
::
regex_replace
(
ss_str
,
std
::
regex
(
"Number of rows.*
\n
"
),
""
);
// Filter out the Operator control flags field when generating the check sum
ss_str
=
std
::
regex_replace
(
ss_str
,
std
::
regex
(
"Operator control flags.*
\n
"
),
""
);
...
...
mindspore/dataset/engine/datasets.py
浏览文件 @
6a55d7ca
...
...
@@ -435,7 +435,8 @@ class Dataset:
parallel (default=None, the value from the config will be used).
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
option could be beneficial if the python operation is computational heavy (default=False).
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
Returns:
MapDataset, dataset after mapping operation.
...
...
@@ -1951,7 +1952,9 @@ class MapDataset(DatasetOp):
in parallel (default=None).
python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
option could be beneficial if the python operation is computational heavy (default=False).
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
Raises:
ValueError: If len(input_columns) != len(output_columns) and columns_order is not specified.
...
...
@@ -2141,6 +2144,7 @@ class RepeatDataset(DatasetOp):
"""
return
self
.
count
class
SkipDataset
(
DatasetOp
):
"""
The result of applying Skip operator to the input Dataset.
...
...
@@ -2406,6 +2410,7 @@ class TransferDataset(DatasetOp):
def
stop_send
(
self
):
self
.
iterator
.
depipeline
.
StopSend
()
class
RangeDataset
(
MappableDataset
):
"""
A source dataset that reads and parses datasets stored on disk in a range.
...
...
@@ -2552,7 +2557,8 @@ class ImageFolderDatasetV2(MappableDataset):
into (default=None).
shard_id (int, optional): The shard ID within num_shards (default=None). This
argument should be specified only when num_shards is also specified.
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
Raises:
RuntimeError: If sampler and shuffle are specified at the same time.
...
...
@@ -3348,7 +3354,8 @@ class TFRecordDataset(SourceDataset):
argument should be specified only when num_shards is also specified.
shard_equal_rows (bool): Get equal rows for all shards(default=False). If shard_equal_rows is false, number
of rows of each shard may be not equal.
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
Examples:
>>> import mindspore.dataset as ds
>>> import mindspore.common.dtype as mstype
...
...
@@ -3919,7 +3926,8 @@ class RandomDataset(SourceDataset):
num_samples (int): number of samples to draw from the total. (default=None, which means all rows)
num_parallel_workers (int, optional): number of workers to read the data
(default=None, number set in the config).
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used)
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
(default=None, expected order behavior shown in the table).
num_shards (int, optional): Number of shards that the dataset should be divided
...
...
@@ -5313,6 +5321,7 @@ class BuildVocabDataset(DatasetOp):
return
new_op
class
BuildSentencePieceVocabDataset
(
DatasetOp
):
"""
Build a SentencePieceVocab from a dataset.
...
...
tests/ut/python/dataset/test_cache_map.py
浏览文件 @
6a55d7ca
...
...
@@ -24,6 +24,7 @@ DATA_DIR = "../data/dataset/testImageNetData/train/"
GENERATE_GOLDEN
=
False
def
test_cache_map_basic1
():
"""
Test mappable leaf with cache op right over the leaf
...
...
@@ -104,11 +105,36 @@ def test_cache_map_basic3():
decode_op
=
c_vision
.
Decode
()
ds1
=
ds1
.
repeat
(
4
)
ds1
=
ds1
.
map
(
input_columns
=
[
"image"
],
operations
=
decode_op
,
cache
=
some_cache
)
print
(
"ds1.dataset_size is "
,
ds1
.
get_dataset_size
())
logger
.
info
(
"ds1.dataset_size is "
,
ds1
.
get_dataset_size
())
num_iter
=
0
for
_
in
ds1
.
create_dict_iterator
():
logger
.
info
(
"get data from dataset"
)
num_iter
+=
1
logger
.
info
(
"Number of data in ds1: {} "
.
format
(
num_iter
))
assert
num_iter
==
8
logger
.
info
(
'test_cache_basic3 Ended.
\n
'
)
def
test_cache_map_basic4
():
"""
Test different rows result in core dump
"""
logger
.
info
(
"Test cache basic 4"
)
some_cache
=
ds
.
DatasetCache
(
session_id
=
1
,
size
=
0
,
spilling
=
True
)
# This DATA_DIR only has 2 images in it
ds1
=
ds
.
ImageFolderDatasetV2
(
dataset_dir
=
DATA_DIR
,
cache
=
some_cache
)
decode_op
=
c_vision
.
Decode
()
ds1
=
ds1
.
repeat
(
4
)
ds1
=
ds1
.
map
(
input_columns
=
[
"image"
],
operations
=
decode_op
)
logger
.
info
(
"ds1.dataset_size is "
,
ds1
.
get_dataset_size
())
shape
=
ds1
.
output_shapes
()
logger
.
info
(
shape
)
num_iter
=
0
for
_
in
ds1
.
create_dict_iterator
():
print
(
"get data from dataset"
)
logger
.
info
(
"get data from dataset"
)
num_iter
+=
1
logger
.
info
(
"Number of data in ds1: {} "
.
format
(
num_iter
))
...
...
@@ -152,12 +178,15 @@ def test_cache_map_failure1():
assert
num_iter
==
0
logger
.
info
(
'test_cache_failure1 Ended.
\n
'
)
if
__name__
==
'__main__'
:
test_cache_map_basic1
()
print
(
"test_cache_map_basic1 success."
)
logger
.
info
(
"test_cache_map_basic1 success."
)
test_cache_map_basic2
()
print
(
"test_cache_map_basic2 success."
)
logger
.
info
(
"test_cache_map_basic2 success."
)
test_cache_map_basic3
()
print
(
"test_cache_map_basic3 success."
)
logger
.
info
(
"test_cache_map_basic3 success."
)
test_cache_map_basic4
()
logger
.
info
(
"test_cache_map_basic3 success."
)
test_cache_map_failure1
()
print
(
"test_cache_map_failure1 success."
)
logger
.
info
(
"test_cache_map_failure1 success."
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录