Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
475e8584
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
475e8584
编写于
4月 07, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
4月 07, 2020
浏览文件
操作
浏览文件
下载
差异文件
!158 fix: resolve MindDataset hung when field not in index when using block_reader
Merge pull request !158 from guozhijian/fix_block_reader_hung
上级
fe4c815d
c6882656
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
43 addition
and
8 deletion
+43
-8
mindspore/ccsrc/mindrecord/io/shard_reader.cc
mindspore/ccsrc/mindrecord/io/shard_reader.cc
+2
-0
mindspore/mindrecord/filewriter.py
mindspore/mindrecord/filewriter.py
+1
-0
mindspore/mindrecord/tools/cifar100_to_mr.py
mindspore/mindrecord/tools/cifar100_to_mr.py
+6
-3
tests/ut/python/dataset/test_minddataset.py
tests/ut/python/dataset/test_minddataset.py
+24
-3
tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
+3
-1
tests/ut/python/mindrecord/test_mindrecord_exception.py
tests/ut/python/mindrecord/test_mindrecord_exception.py
+7
-1
未找到文件。
mindspore/ccsrc/mindrecord/io/shard_reader.cc
浏览文件 @
475e8584
...
...
@@ -785,6 +785,8 @@ vector<std::string> ShardReader::GetAllColumns() {
MSRStatus
ShardReader
::
CreateTasksByBlock
(
const
std
::
vector
<
std
::
tuple
<
int
,
int
,
int
,
uint64_t
>>
&
row_group_summary
,
const
std
::
vector
<
std
::
shared_ptr
<
ShardOperator
>>
&
operators
)
{
vector
<
std
::
string
>
columns
=
GetAllColumns
();
CheckIfColumnInIndex
(
columns
);
for
(
const
auto
&
rg
:
row_group_summary
)
{
auto
shard_id
=
std
::
get
<
0
>
(
rg
);
auto
group_id
=
std
::
get
<
1
>
(
rg
);
...
...
mindspore/mindrecord/filewriter.py
浏览文件 @
475e8584
...
...
@@ -143,6 +143,7 @@ class FileWriter:
ParamTypeError: If index field is invalid.
MRMDefineIndexError: If index field is not primitive type.
MRMAddIndexError: If failed to add index field.
MRMGetMetaError: If the schema is not set or get meta failed.
"""
if
not
index_fields
or
not
isinstance
(
index_fields
,
list
):
raise
ParamTypeError
(
'index_fields'
,
'list'
)
...
...
mindspore/mindrecord/tools/cifar100_to_mr.py
浏览文件 @
475e8584
...
...
@@ -24,7 +24,7 @@ from mindspore import log as logger
from
.cifar100
import
Cifar100
from
..common.exceptions
import
PathNotExistsError
from
..filewriter
import
FileWriter
from
..shardutils
import
check_filename
from
..shardutils
import
check_filename
,
SUCCESS
try
:
cv2
=
import_module
(
"cv2"
)
except
ModuleNotFoundError
:
...
...
@@ -98,8 +98,11 @@ class Cifar100ToMR:
data_list
=
_construct_raw_data
(
images
,
fine_labels
,
coarse_labels
)
test_data_list
=
_construct_raw_data
(
test_images
,
test_fine_labels
,
test_coarse_labels
)
_generate_mindrecord
(
self
.
destination
,
data_list
,
fields
,
"img_train"
)
_generate_mindrecord
(
self
.
destination
+
"_test"
,
test_data_list
,
fields
,
"img_test"
)
if
_generate_mindrecord
(
self
.
destination
,
data_list
,
fields
,
"img_train"
)
!=
SUCCESS
:
return
FAILED
if
_generate_mindrecord
(
self
.
destination
+
"_test"
,
test_data_list
,
fields
,
"img_test"
)
!=
SUCCESS
:
return
FAILED
return
SUCCESS
def
_construct_raw_data
(
images
,
fine_labels
,
coarse_labels
):
"""
...
...
tests/ut/python/dataset/test_minddataset.py
浏览文件 @
475e8584
...
...
@@ -47,7 +47,9 @@ def add_and_remove_cv_file():
os
.
remove
(
"{}.db"
.
format
(
x
))
if
os
.
path
.
exists
(
"{}.db"
.
format
(
x
))
else
None
writer
=
FileWriter
(
CV_FILE_NAME
,
FILES_NUM
)
data
=
get_data
(
CV_DIR_NAME
)
cv_schema_json
=
{
"file_name"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
cv_schema_json
=
{
"id"
:
{
"type"
:
"int32"
},
"file_name"
:
{
"type"
:
"string"
},
"label"
:
{
"type"
:
"int32"
},
"data"
:
{
"type"
:
"bytes"
}}
writer
.
add_schema
(
cv_schema_json
,
"img_schema"
)
writer
.
add_index
([
"file_name"
,
"label"
])
...
...
@@ -226,6 +228,24 @@ def test_cv_minddataset_blockreader_tutorial(add_and_remove_cv_file):
num_iter
+=
1
assert
num_iter
==
20
def
test_cv_minddataset_blockreader_some_field_not_in_index_tutorial
(
add_and_remove_cv_file
):
"""tutorial for cv minddataset."""
columns_list
=
[
"id"
,
"data"
,
"label"
]
num_readers
=
4
data_set
=
ds
.
MindDataset
(
CV_FILE_NAME
+
"0"
,
columns_list
,
num_readers
,
shuffle
=
False
,
block_reader
=
True
)
assert
data_set
.
get_dataset_size
()
==
10
repeat_num
=
2
data_set
=
data_set
.
repeat
(
repeat_num
)
num_iter
=
0
for
item
in
data_set
.
create_dict_iterator
():
logger
.
info
(
"-------------- block reader repeat tow {} -----------------"
.
format
(
num_iter
))
logger
.
info
(
"-------------- item[id]: {} ----------------------------"
.
format
(
item
[
"id"
]))
logger
.
info
(
"-------------- item[label]: {} ----------------------------"
.
format
(
item
[
"label"
]))
logger
.
info
(
"-------------- item[data]: {} -----------------------------"
.
format
(
item
[
"data"
]))
num_iter
+=
1
assert
num_iter
==
20
def
test_cv_minddataset_reader_basic_tutorial
(
add_and_remove_cv_file
):
"""tutorial for cv minderdataset."""
...
...
@@ -359,13 +379,14 @@ def get_data(dir_name):
lines
=
file_reader
.
readlines
()
data_list
=
[]
for
line
in
lines
:
for
i
,
line
in
enumerate
(
lines
)
:
try
:
filename
,
label
=
line
.
split
(
","
)
label
=
label
.
strip
(
"
\n
"
)
with
open
(
os
.
path
.
join
(
img_dir
,
filename
),
"rb"
)
as
file_reader
:
img
=
file_reader
.
read
()
data_json
=
{
"file_name"
:
filename
,
data_json
=
{
"id"
:
i
,
"file_name"
:
filename
,
"data"
:
img
,
"label"
:
int
(
label
)}
data_list
.
append
(
data_json
)
...
...
tests/ut/python/mindrecord/test_cifar100_to_mindrecord.py
浏览文件 @
475e8584
...
...
@@ -18,6 +18,7 @@ import pytest
from
mindspore.mindrecord
import
Cifar100ToMR
from
mindspore.mindrecord
import
FileReader
from
mindspore.mindrecord
import
MRMOpenError
from
mindspore.mindrecord
import
SUCCESS
from
mindspore
import
log
as
logger
CIFAR100_DIR
=
"../data/mindrecord/testCifar100Data"
...
...
@@ -26,7 +27,8 @@ MINDRECORD_FILE = "./cifar100.mindrecord"
def
test_cifar100_to_mindrecord_without_index_fields
():
"""test transform cifar100 dataset to mindrecord without index fields."""
cifar100_transformer
=
Cifar100ToMR
(
CIFAR100_DIR
,
MINDRECORD_FILE
)
cifar100_transformer
.
transform
()
ret
=
cifar100_transformer
.
transform
()
assert
ret
==
SUCCESS
,
"Failed to tranform from cifar100 to mindrecord"
assert
os
.
path
.
exists
(
MINDRECORD_FILE
)
assert
os
.
path
.
exists
(
MINDRECORD_FILE
+
"_test"
)
read
()
...
...
tests/ut/python/mindrecord/test_mindrecord_exception.py
浏览文件 @
475e8584
...
...
@@ -16,7 +16,7 @@
import
os
import
pytest
from
mindspore.mindrecord
import
FileWriter
,
FileReader
,
MindPage
from
mindspore.mindrecord
import
MRMOpenError
,
MRMGenerateIndexError
,
ParamValueError
from
mindspore.mindrecord
import
MRMOpenError
,
MRMGenerateIndexError
,
ParamValueError
,
MRMGetMetaError
from
mindspore
import
log
as
logger
from
utils
import
get_data
...
...
@@ -280,3 +280,9 @@ def test_cv_file_writer_shard_num_greater_than_1000():
with
pytest
.
raises
(
ParamValueError
)
as
err
:
FileWriter
(
CV_FILE_NAME
,
1001
)
assert
'Shard number should between'
in
str
(
err
.
value
)
def
test_add_index_without_add_schema
():
with
pytest
.
raises
(
MRMGetMetaError
)
as
err
:
fw
=
FileWriter
(
CV_FILE_NAME
)
fw
.
add_index
([
"label"
])
assert
'Failed to get meta info'
in
str
(
err
.
value
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录