Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
09cf02c5
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
09cf02c5
编写于
6月 02, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 02, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1735 profiling for minddata and tdt
Merge pull request !1735 from yanghaitao/yht_profiling
上级
6ef32444
dc125985
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
300 addition
and
2 deletion
+300
-2
mindspore/ccsrc/dataset/engine/connector.h
mindspore/ccsrc/dataset/engine/connector.h
+17
-0
mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+16
-0
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+54
-1
mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
+4
-0
mindspore/ccsrc/dataset/engine/execution_tree.cc
mindspore/ccsrc/dataset/engine/execution_tree.cc
+1
-0
mindspore/ccsrc/dataset/util/CMakeLists.txt
mindspore/ccsrc/dataset/util/CMakeLists.txt
+2
-1
mindspore/ccsrc/dataset/util/profiling.cc
mindspore/ccsrc/dataset/util/profiling.cc
+112
-0
mindspore/ccsrc/dataset/util/profiling.h
mindspore/ccsrc/dataset/util/profiling.h
+92
-0
mindspore/ccsrc/dataset/util/queue.h
mindspore/ccsrc/dataset/util/queue.h
+2
-0
未找到文件。
mindspore/ccsrc/dataset/engine/connector.h
浏览文件 @
09cf02c5
...
...
@@ -152,6 +152,23 @@ class Connector {
return
out
;
}
// Get current size of connector.
int32_t
size
()
const
{
int32_t
size
=
0
;
for
(
int32_t
i
=
0
;
i
<
queues_
.
size
();
++
i
)
{
size
+=
queues_
[
i
]
->
size
();
}
return
size
;
}
int32_t
capacity
()
const
{
int32_t
capacity
=
0
;
for
(
int32_t
i
=
0
;
i
<
queues_
.
size
();
++
i
)
{
capacity
+=
queues_
[
i
]
->
capacity
();
}
return
capacity
;
}
// Register the internal resources with Task group for interruption service.
// @param vg
// @return
...
...
mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
浏览文件 @
09cf02c5
...
...
@@ -211,6 +211,22 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
// @return - the column name map as a string
std
::
string
ColumnNameMapAsString
()
const
;
// Getter function
// @return connector size of current op
virtual
int32_t
ConnectorSize
()
const
{
return
out_connector_
->
size
();
}
// Getter function
// @return connector size of current op
virtual
int32_t
ConnectorCapacity
()
const
{
return
out_connector_
->
capacity
();
}
// Getter function
// @return connector size of child op
int32_t
ChildOpConnectorSize
(
int32_t
child_index
=
0
)
const
{
return
child_
[
child_index
]
->
ConnectorSize
();
}
// Getter function
// @return connector capacity of child op
int32_t
ChildOpConnectorCapacity
(
int32_t
child_index
=
0
)
const
{
return
child_
[
child_index
]
->
ConnectorCapacity
();
}
// Children Getter
// @return Vector or Children
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
Children
()
const
{
return
child_
;
}
...
...
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
浏览文件 @
09cf02c5
...
...
@@ -25,9 +25,13 @@
#include "dataset/util/status.h"
#include "dataset/util/task_manager.h"
#include "dataset/engine/opt/pass.h"
#include "dataset/util/profiling.h"
namespace
mindspore
{
namespace
dataset
{
#define DEVICE_QUEUE_PROFILING_DATA(type, subtype, batch_num, value) \
std::to_string(type) + " " + std::to_string(subtype) + " " + std::to_string(batch_num) + " " + std::to_string(value)
DeviceQueueOp
::
DeviceQueueOp
(
std
::
string
channel_name
,
DeviceType
device_type
,
int32_t
device_id
,
int32_t
prefetch_size
,
int32_t
op_connector_size
,
int64_t
num_batch
)
:
PipelineOp
(
op_connector_size
),
...
...
@@ -97,7 +101,25 @@ Status DeviceQueueOp::SendDataToAscend() {
MS_LOG
(
INFO
)
<<
"Device queue, sending data to Ascend."
;
int64_t
total_batch
=
0
;
bool
is_break_loop
=
false
;
double
batch_start_time
,
tdt_start_time
,
end_time
;
int32_t
batch_cost
,
tdt_cost
;
int32_t
connector_size
=
0
;
int32_t
connector_capacity
;
std
::
shared_ptr
<
Profiling
>
profiling_node
;
bool
isProfilingEnable
=
ProfilingManager
::
GetInstance
().
IsProfilingEnable
();
if
(
isProfilingEnable
)
{
std
::
string
file_name
=
"critical_point_profiling"
;
// Here can determine performance bottleneck is in pipeline or in tdt.
// Context format of this file "type subtype batchnum value"
// type:0: time, 1: queue depth
// subtype:0: pipeline time, 1: push tdt time, 2: all time
// batchnum: batch number
// value: value of time(ms) or queue depth
profiling_node
=
std
::
make_shared
<
Profiling
>
(
file_name
,
device_id_
);
RETURN_IF_NOT_OK
(
ProfilingManager
::
GetInstance
().
RegisterProfilingNode
(
&
profiling_node
));
batch_start_time
=
ProfilingTime
::
GetCurMilliSecond
();
connector_capacity
=
ChildOpConnectorCapacity
();
}
std
::
unique_ptr
<
DataBuffer
>
current_buffer
;
RETURN_IF_NOT_OK
(
GetNextInput
(
&
current_buffer
));
...
...
@@ -107,20 +129,51 @@ Status DeviceQueueOp::SendDataToAscend() {
TensorRow
currRow
;
for
(
int
row_id
=
0
;
row_id
<
current_buffer
->
NumRows
()
&&
!
is_break_loop
;
row_id
++
)
{
RETURN_IF_NOT_OK
(
current_buffer
->
GetRow
(
row_id
,
&
currRow
));
if
(
isProfilingEnable
)
{
tdt_start_time
=
ProfilingTime
::
GetCurMilliSecond
();
}
auto
status
=
tdtInstancePtr
->
hostPush
(
currRow
,
true
,
channel_name_
);
if
(
status
==
TdtStatus
::
FAILED
)
{
return
Status
(
StatusCode
::
kTDTPushFailure
,
"TDT Push Failed"
);
}
if
(
isProfilingEnable
)
{
end_time
=
ProfilingTime
::
GetCurMilliSecond
();
tdt_cost
=
(
int32_t
)(
end_time
-
tdt_start_time
);
// record push tdt time
profiling_node
->
Record
(
DEVICE_QUEUE_PROFILING_DATA
(
TIME
,
TDT_PUSH_TIME
,
total_batch
+
1
,
tdt_cost
));
batch_cost
=
(
int32_t
)(
end_time
-
batch_start_time
);
// record batch time
profiling_node
->
Record
(
DEVICE_QUEUE_PROFILING_DATA
(
TIME
,
BATCH_TIME
,
total_batch
+
1
,
batch_cost
));
// record pipeline time
profiling_node
->
Record
(
DEVICE_QUEUE_PROFILING_DATA
(
TIME
,
PIPELINE_TIME
,
total_batch
+
1
,
batch_cost
-
tdt_cost
));
batch_start_time
=
end_time
;
// record connector depth
profiling_node
->
Record
(
DEVICE_QUEUE_PROFILING_DATA
(
CONNECTOR_DEPTH
,
connector_capacity
,
total_batch
+
1
,
connector_size
));
}
total_batch
++
;
if
(
num_batch_
>
0
&&
total_batch
==
num_batch_
)
{
is_break_loop
=
true
;
}
}
if
(
isProfilingEnable
)
{
connector_size
=
ChildOpConnectorSize
();
connector_capacity
=
ChildOpConnectorCapacity
();
}
RETURN_IF_NOT_OK
(
GetNextInput
(
&
current_buffer
));
}
if
(
isProfilingEnable
)
{
connector_size
=
ChildOpConnectorSize
();
connector_capacity
=
ChildOpConnectorCapacity
();
}
RETURN_IF_NOT_OK
(
GetNextInput
(
&
current_buffer
));
}
if
(
isProfilingEnable
)
{
profiling_node
->
SaveToFile
();
}
MS_LOG
(
INFO
)
<<
"Device queue total batch is "
<<
total_batch
<<
", number of batches is "
<<
num_batch_
<<
"."
;
return
Status
::
OK
();
...
...
mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
浏览文件 @
09cf02c5
...
...
@@ -124,6 +124,10 @@ class RepeatOp : public PipelineOp {
// @return - Status of the node visit.
Status
Accept
(
NodePass
*
p
,
bool
*
modified
)
override
;
virtual
int32_t
ConnectorSize
()
const
{
return
child_
[
0
]
->
ConnectorSize
();
}
virtual
int32_t
ConnectorCapacity
()
const
{
return
child_
[
0
]
->
ConnectorCapacity
();
}
private:
int32_t
max_repeats_
;
// The number of repeats that the user requested
int32_t
repeat_count_
;
// A counter for the current number of executed repeats
...
...
mindspore/ccsrc/dataset/engine/execution_tree.cc
浏览文件 @
09cf02c5
...
...
@@ -19,6 +19,7 @@
#include "dataset/engine/datasetops/dataset_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/util/task_manager.h"
#include "dataset/util/profiling.h"
#include "dataset/engine/opt/util/printer_pass.h"
...
...
mindspore/ccsrc/dataset/util/CMakeLists.txt
浏览文件 @
09cf02c5
...
...
@@ -14,4 +14,5 @@ add_library(utils OBJECT
status.cc
path.cc
wait_post.cc
sig_handler.cc
)
sig_handler.cc
profiling.cc
)
mindspore/ccsrc/dataset/util/profiling.cc
0 → 100644
浏览文件 @
09cf02c5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/util/profiling.h"
#include <sys/time.h>
#include <cstdlib>
#include <fstream>
#include "dataset/util/path.h"
#include "common/utils.h"
#include "utils/log_adapter.h"
namespace
mindspore
{
namespace
dataset
{
Profiling
::
Profiling
(
const
std
::
string
&
file_name
,
const
int32_t
device_id
)
:
file_name_
(
file_name
),
device_id_
(
device_id
)
{}
Status
Profiling
::
Init
()
{
std
::
string
dir
=
common
::
GetEnv
(
"MINDDATA_PROFILING_DIR"
);
if
(
dir
.
empty
())
{
RETURN_STATUS_UNEXPECTED
(
"Profiling dir is not set."
);
}
char
real_path
[
PATH_MAX
]
=
{
0
};
if
(
dir
.
size
()
>=
PATH_MAX
)
{
RETURN_STATUS_UNEXPECTED
(
"Profiling dir is invalid."
);
}
#if defined(_WIN32) || defined(_WIN64)
if
(
_fullpath
(
real_path
,
common
::
SafeCStr
(
dir
),
PATH_MAX
)
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"Profiling dir is invalid."
);
}
#else
if
(
realpath
(
common
::
SafeCStr
(
dir
),
real_path
)
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"Profiling dir is invalid."
);
}
#endif
file_path_
=
(
Path
(
real_path
)
/
Path
(
file_name_
+
"_"
+
std
::
to_string
(
device_id_
)
+
".txt"
)).
toString
();
return
Status
::
OK
();
}
Status
Profiling
::
Record
(
const
std
::
string
&
data
)
{
value_
.
emplace_back
(
data
);
return
Status
::
OK
();
}
Status
Profiling
::
SaveToFile
()
{
if
(
file_name_
.
empty
())
{
RETURN_STATUS_UNEXPECTED
(
"Profiling file name has not been set."
);
}
std
::
ofstream
handle
(
file_path_
,
std
::
ios
::
app
);
if
(
!
handle
.
is_open
())
{
RETURN_STATUS_UNEXPECTED
(
"Profiling file can not be opened."
);
}
for
(
auto
value
:
value_
)
{
handle
<<
value
<<
"
\n
"
;
}
handle
.
close
();
return
Status
::
OK
();
}
ProfilingManager
&
ProfilingManager
::
GetInstance
()
{
static
ProfilingManager
instance
;
return
instance
;
}
bool
ProfilingManager
::
IsProfilingEnable
()
const
{
auto
profiling
=
common
::
GetEnv
(
"PROFILING_MODE"
);
if
(
profiling
.
empty
()
||
profiling
!=
"true"
)
{
return
false
;
}
return
true
;
}
Status
ProfilingManager
::
RegisterProfilingNode
(
std
::
shared_ptr
<
Profiling
>
*
node
)
{
RETURN_IF_NOT_OK
((
*
node
)
->
Init
());
profiling_node_
.
emplace_back
(
*
node
);
return
Status
::
OK
();
}
Status
ProfilingManager
::
SaveProfilingData
()
{
if
(
!
IsProfilingEnable
())
{
return
Status
::
OK
();
}
MS_LOG
(
INFO
)
<<
"Start to save profile data."
;
for
(
auto
node
:
profiling_node_
)
{
RETURN_IF_NOT_OK
(
node
->
SaveToFile
());
}
MS_LOG
(
INFO
)
<<
"Save profile data end."
;
return
Status
::
OK
();
}
double
ProfilingTime
::
GetCurMilliSecond
()
{
struct
timeval
tv
=
{
0
,
0
};
(
void
)
gettimeofday
(
&
tv
,
nullptr
);
return
tv
.
tv_sec
*
1000
+
tv
.
tv_usec
/
1000
;
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/util/profiling.h
0 → 100644
浏览文件 @
09cf02c5
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_UTIL_PROFILE_H_
#define DATASET_UTIL_PROFILE_H_
#include <string>
#include <vector>
#include <memory>
#include "dataset/util/status.h"
namespace
mindspore
{
namespace
dataset
{
enum
ProfilingType
{
TIME
,
CONNECTOR_DEPTH
,
};
enum
ProfilingTimeSubType
{
PIPELINE_TIME
,
TDT_PUSH_TIME
,
BATCH_TIME
,
INVALID_TIME
,
};
class
Profiling
{
public:
// Constructor
Profiling
()
=
default
;
// Constructor if need save profile data to file
Profiling
(
const
std
::
string
&
file_name
,
const
int32_t
device_id
);
// Destructor
~
Profiling
()
=
default
;
Status
Init
();
// Record profile data
Status
Record
(
const
std
::
string
&
data
);
// Save profile data to file if necessary
Status
SaveToFile
();
private:
std
::
vector
<
std
::
string
>
value_
;
std
::
string
file_name_
;
std
::
string
file_path_
;
int32_t
device_id_
;
};
class
ProfilingManager
{
public:
ProfilingManager
()
=
default
;
~
ProfilingManager
()
=
default
;
static
ProfilingManager
&
GetInstance
();
// Save profile data to file
// @return Status - The error code return
Status
SaveProfilingData
();
// Register profile node to tree
// @param node - Profiling node
// @return Status - The error code return
Status
RegisterProfilingNode
(
std
::
shared_ptr
<
Profiling
>
*
node
);
bool
IsProfilingEnable
()
const
;
private:
std
::
vector
<
std
::
shared_ptr
<
Profiling
>>
profiling_node_
;
};
class
ProfilingTime
{
public:
static
double
GetCurMilliSecond
();
};
}
// namespace dataset
}
// namespace mindspore
#endif
mindspore/ccsrc/dataset/util/queue.h
浏览文件 @
09cf02c5
...
...
@@ -230,6 +230,8 @@ class QueueList {
std
::
unique_ptr
<
Queue
<
T
>>
&
operator
[](
const
int
index
)
{
return
queue_list_
[
index
];
}
const
std
::
unique_ptr
<
Queue
<
T
>>
&
operator
[](
const
int
index
)
const
{
return
queue_list_
[
index
];
}
~
QueueList
()
=
default
;
private:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录