Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
b829be11
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b829be11
编写于
4月 01, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
4月 01, 2020
浏览文件
操作
浏览文件
下载
差异文件
!37 Quick fix for pre-opensource TFReaderOp issue
Merge pull request !37 from Peilin/peilin-pre-opensource-tfreader-fix
上级
22a9c00b
0ae77bb0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
51 addition
and
11 deletion
+51
-11
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
...re/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+35
-6
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
...ore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
+1
-0
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+15
-5
未找到文件。
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
浏览文件 @
b829be11
...
...
@@ -105,6 +105,7 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64
data_schema_
(
std
::
move
(
data_schema
)),
filename_index_
(
make_unique
<
StringIndex
>
()),
load_io_block_queue_
(
true
),
load_jagged_connector_
(
true
),
num_rows_
(
0
),
num_rows_per_shard_
(
0
),
equal_rows_per_shard_
(
equal_rows_per_shard
)
{
...
...
@@ -203,6 +204,25 @@ Status TFReaderOp::operator()() {
buffer_id
++
;
RETURN_IF_NOT_OK
(
out_connector_
->
Add
(
0
,
std
::
move
(
fetched_buffer
)));
}
else
{
// user specified number of rows they want, and we read enough rows
//
// IOBlockQueue thread needs to:
// -stop pushing stuff to IOBlockQueue
// -call PostEndOfEpoch (will send EOE)
// -wait for reset
//
// Worker threads need to:
// -stop reading the file they are currently reading and throw it away
// -keep pulling, but dont read other files (eventually skips all IOBlocks and will get EOE)
//
// Master thread needs to:
// -tell IOBlockQueue thread to stop pushing
// -tell worker threads to stop reading the file tey are currently reading
// -keep pulling until EOE
// don't think we need a lock for now
load_jagged_connector_
=
false
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
load_io_block_queue_mutex_
);
load_io_block_queue_
=
false
;
}
...
...
@@ -245,12 +265,14 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) {
while
(
!
io_block
->
eof
())
{
if
(
!
io_block
->
eoe
())
{
std
::
string
filename
;
RETURN_IF_NOT_OK
(
io_block
->
GetFilename
(
&
filename
,
*
filename_index_
));
int64_t
start_offset
=
io_block
->
GetStartOffset
();
int64_t
end_offset
=
io_block
->
GetEndOffset
();
RETURN_IF_NOT_OK
(
LoadFile
(
filename
,
start_offset
,
end_offset
,
worker_id
));
MS_LOG
(
INFO
)
<<
"TFReader operator worker "
<<
worker_id
<<
" loaded file "
<<
common
::
SafeCStr
(
filename
)
<<
"."
;
if
(
load_jagged_connector_
)
{
std
::
string
filename
;
RETURN_IF_NOT_OK
(
io_block
->
GetFilename
(
&
filename
,
*
filename_index_
));
int64_t
start_offset
=
io_block
->
GetStartOffset
();
int64_t
end_offset
=
io_block
->
GetEndOffset
();
RETURN_IF_NOT_OK
(
LoadFile
(
filename
,
start_offset
,
end_offset
,
worker_id
));
MS_LOG
(
INFO
)
<<
"TFReader operator worker "
<<
worker_id
<<
" loaded file "
<<
filename
<<
"."
;
}
}
else
{
std
::
unique_ptr
<
DataBuffer
>
eoe_buffer
=
mindspore
::
make_unique
<
DataBuffer
>
(
1
,
DataBuffer
::
kDeBFlagEOE
);
RETURN_IF_NOT_OK
(
jagged_buffer_connector_
->
Add
(
worker_id
,
std
::
move
(
eoe_buffer
)));
...
...
@@ -478,6 +500,10 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off
std
::
unique_ptr
<
TensorQTable
>
new_tensor_table
=
make_unique
<
TensorQTable
>
();
while
(
reader
.
peek
()
!=
EOF
)
{
if
(
!
load_jagged_connector_
)
{
break
;
}
// read length
int64_t
record_length
=
0
;
(
void
)
reader
.
read
(
reinterpret_cast
<
char
*>
(
&
record_length
),
static_cast
<
std
::
streamsize
>
(
sizeof
(
int64_t
)));
...
...
@@ -599,6 +625,9 @@ Status TFReaderOp::LoadFeature(const std::unique_ptr<TensorQTable> *tensor_table
// Overrides base class reset method. Cleans up any state info from it's previous execution and
// reinitializes itself so that it can be executed again, as if it was just created.
Status
TFReaderOp
::
Reset
()
{
// start workers first, otherwise IOBlokcs will fall through if workers see it before this is set to true
load_jagged_connector_
=
true
;
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
load_io_block_queue_mutex_
);
load_io_block_queue_
=
true
;
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
浏览文件 @
b829be11
...
...
@@ -369,6 +369,7 @@ class TFReaderOp : public ParallelOp {
std
::
unique_ptr
<
DataSchema
>
data_schema_
;
std
::
unique_ptr
<
StringIndex
>
filename_index_
;
bool
load_io_block_queue_
;
bool
load_jagged_connector_
;
std
::
unique_ptr
<
JaggedConnector
>
jagged_buffer_connector_
;
QueueList
<
std
::
unique_ptr
<
FilenameBlock
>>
io_block_queues_
;
...
...
mindspore/dataset/engine/datasets.py
浏览文件 @
b829be11
...
...
@@ -1906,11 +1906,21 @@ class TFRecordDataset(SourceDataset):
Return:
Number, number of batches.
"""
num_rows
=
TFReaderOp
.
get_num_rows
(
self
.
dataset_files
,
8
,
estimate
)
num_rows
=
get_num_rows
(
num_rows
,
self
.
num_shards
)
if
self
.
num_samples
is
None
:
return
num_rows
return
min
(
self
.
num_samples
,
num_rows
)
if
self
.
_dataset_size
is
None
:
num_rows
=
TFReaderOp
.
get_num_rows
(
self
.
dataset_files
,
8
,
estimate
)
num_rows
=
get_num_rows
(
num_rows
,
self
.
num_shards
)
if
self
.
num_samples
is
None
:
return
num_rows
return
min
(
self
.
num_samples
,
num_rows
)
return
self
.
_dataset_size
# manually set dataset_size as a tempoary solution.
def
set_dataset_size
(
self
,
value
):
logger
.
warning
(
"WARN_DEPRECATED: This method is deprecated. Please use get_dataset_size directly."
)
if
value
>=
0
:
self
.
_dataset_size
=
value
else
:
raise
ValueError
(
'set dataset_size with negative value {}'
.
format
(
value
))
class
ManifestDataset
(
SourceDataset
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录