Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
38b4413f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
38b4413f
编写于
8月 29, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 29, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5386 Fix GPU watchpoints check when multiple conditions set on same node
Merge pull request !5386 from HarshvardhanGupta/fix-gpu-wp-skip
上级
767c4c7f
3226e840
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
16 addition
and
97 deletion
+16
-97
mindspore/ccsrc/debug/debug_services.cc
mindspore/ccsrc/debug/debug_services.cc
+2
-59
mindspore/ccsrc/debug/debug_services.h
mindspore/ccsrc/debug/debug_services.h
+2
-4
mindspore/ccsrc/debug/debugger/debugger.cc
mindspore/ccsrc/debug/debugger/debugger.cc
+11
-32
mindspore/ccsrc/debug/debugger/debugger.h
mindspore/ccsrc/debug/debugger/debugger.h
+1
-2
未找到文件。
mindspore/ccsrc/debug/debug_services.cc
浏览文件 @
38b4413f
...
...
@@ -66,11 +66,9 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
void
DebugServices
::
CheckWatchpoints
(
std
::
vector
<
std
::
string
>
*
name
,
std
::
vector
<
std
::
string
>
*
slot
,
std
::
vector
<
int
>
*
condition
,
std
::
vector
<
unsigned
int
>
*
watchpoint_id
,
const
std
::
vector
<
std
::
string
>
&
op_overflows
)
{
const
std
::
vector
<
std
::
string
>
&
op_overflows
,
const
std
::
vector
<
std
::
shared_ptr
<
TensorData
>>
&
tensor_list
)
{
std
::
lock_guard
<
std
::
mutex
>
lg
(
lock_
);
std
::
vector
<
std
::
shared_ptr
<
TensorData
>>
tensor_list
=
tensor_loader_
->
GetTensor
();
std
::
string
current_tensor_name
;
std
::
unordered_map
<
unsigned
int
,
watchpoint_t
>
watchpoints_to_check_table
;
const
size_t
location
=
0
;
...
...
@@ -198,61 +196,6 @@ void DebugServices::HandleWatchpointHits(const std::vector<unsigned int> &hit_en
}
}
void
DebugServices
::
CheckSingleWatchpoint
(
std
::
shared_ptr
<
TensorData
>
watchtensor
,
std
::
string
*
name
,
std
::
string
*
slot
,
char
**
data_ptr
,
unsigned
int
*
data_size
,
int
*
condition
,
unsigned
int
*
wacthpoint_id
)
{
std
::
lock_guard
<
std
::
mutex
>
lg
(
lock_
);
std
::
string
current_watchtensor_name
;
current_watchtensor_name
=
watchtensor
->
GetName
();
mindspore
::
tensor
::
TensorPtr
tensor_ptr
=
watchtensor
->
GetTensor
();
int
tensor_data_type
=
tensor_ptr
->
data_type_c
();
watchpoint_t
watchpoint_to_check
;
for
(
auto
w_table_item
:
watchpoint_table
)
{
auto
check_node_list
=
std
::
get
<
1
>
(
w_table_item
).
check_node_list
;
for
(
auto
check_node
:
check_node_list
)
{
std
::
string
w_name
=
std
::
get
<
0
>
(
check_node
);
bool
w_type
=
std
::
get
<
1
>
(
check_node
);
// get current the full info including condition, id..., for current watchtensor
std
::
string
current_node_name
=
current_watchtensor_name
.
substr
(
0
,
current_watchtensor_name
.
find_first_of
(
":"
));
if
((
w_type
==
true
&&
(
current_watchtensor_name
.
find
(
w_name
)
!=
string
::
npos
||
w_name
==
"*"
))
||
(
w_type
==
false
&&
current_node_name
==
w_name
))
{
watchpoint_to_check
=
w_table_item
.
second
;
// need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
if
(
tensor_data_type
!=
kNumberTypeFloat
&&
tensor_data_type
!=
kNumberTypeFloat32
)
{
return
;
}
break
;
}
}
}
float
*
start_addr
=
reinterpret_cast
<
float
*>
(
tensor_ptr
->
data_c
());
unsigned
int
num_elements
=
(
tensor_ptr
->
data
().
nbytes
())
/
sizeof
(
float
);
for
(
unsigned
int
index
=
0
;
index
<
num_elements
;
index
++
)
{
float
x
=
start_addr
[
index
];
if
(((
watchpoint_to_check
.
conditions
.
inf
.
enabled
||
watchpoint_to_check
.
conditions
.
neg_inf
.
enabled
)
&&
isinf
(
x
))
||
(
watchpoint_to_check
.
conditions
.
nan
.
enabled
&&
isnan
(
x
)))
{
std
::
string
name_no_slot
=
current_watchtensor_name
.
substr
(
0
,
current_watchtensor_name
.
find_first_of
(
":"
));
*
name
=
name_no_slot
;
*
slot
=
std
::
to_string
(
watchtensor
->
GetSlot
());
*
data_ptr
=
reinterpret_cast
<
char
*>
(
tensor_ptr
->
data_c
());
*
data_size
=
tensor_ptr
->
data
().
nbytes
();
int
condition_item
=
-
1
;
if
(
watchpoint_to_check
.
conditions
.
nan
.
enabled
)
{
condition_item
=
0
;
}
else
if
(
watchpoint_to_check
.
conditions
.
inf
.
enabled
||
watchpoint_to_check
.
conditions
.
neg_inf
.
enabled
)
{
condition_item
=
1
;
}
*
condition
=
condition_item
;
*
wacthpoint_id
=
watchpoint_to_check
.
id
;
}
}
}
void
DebugServices
::
ReadNodesTensors
(
std
::
vector
<
std
::
string
>
name
,
std
::
vector
<
std
::
string
>
*
ret_name
,
std
::
vector
<
char
*>
*
data_ptr
,
std
::
vector
<
unsigned
int
>
*
data_size
,
std
::
vector
<
TypePtr
>
*
dtype
,
std
::
vector
<
std
::
vector
<
int
>>
*
shape
)
{
...
...
mindspore/ccsrc/debug/debug_services.h
浏览文件 @
38b4413f
...
...
@@ -76,10 +76,8 @@ class DebugServices {
void
RemoveWatchpoint
(
unsigned
int
id
);
void
CheckWatchpoints
(
std
::
vector
<
std
::
string
>
*
name
,
std
::
vector
<
std
::
string
>
*
slot
,
std
::
vector
<
int
>
*
condition
,
std
::
vector
<
unsigned
int
>
*
watchpoint_id
,
const
std
::
vector
<
std
::
string
>
&
op_overflows
);
void
CheckSingleWatchpoint
(
std
::
shared_ptr
<
TensorData
>
watchnode
,
std
::
string
*
name
,
std
::
string
*
slot
,
char
**
data_ptr
,
unsigned
int
*
data_size
,
int
*
condition
,
unsigned
int
*
wacthpoint_id
);
std
::
vector
<
unsigned
int
>
*
watchpoint_id
,
const
std
::
vector
<
std
::
string
>
&
op_overflows
,
const
std
::
vector
<
std
::
shared_ptr
<
TensorData
>>
&
tensor_list
);
void
ReadNodesTensors
(
std
::
vector
<
std
::
string
>
name
,
std
::
vector
<
std
::
string
>
*
ret_name
,
std
::
vector
<
char
*>
*
data_ptr
,
std
::
vector
<
unsigned
int
>
*
data_size
,
...
...
mindspore/ccsrc/debug/debugger/debugger.cc
浏览文件 @
38b4413f
...
...
@@ -254,7 +254,7 @@ void Debugger::PostExecuteNode() {
// if kernel is watchpoint,and get hit. suspend.
if
(
is_watchpoint
)
{
auto
hits
=
Check
SingleWatchpoint
(
cur_name_
);
auto
hits
=
Check
Watchpoints
(
cur_name_
);
if
(
!
hits
.
empty
())
{
SendWatchpointsAndSuspend
(
hits
);
}
...
...
@@ -547,7 +547,7 @@ void Debugger::Exit() {
std
::
exit
(
EXIT_FAILURE
);
}
std
::
list
<
WatchpointHit
>
Debugger
::
CheckWatchpoints
()
{
std
::
list
<
WatchpointHit
>
Debugger
::
CheckWatchpoints
(
const
std
::
string
&
watchnode
)
{
std
::
vector
<
std
::
string
>
name
;
std
::
vector
<
std
::
string
>
slot
;
std
::
vector
<
int
>
condition
;
...
...
@@ -556,7 +556,15 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() {
#ifdef ENABLE_D
overflow_ops
=
CheckOpOverflow
();
#endif
debug_services_
->
CheckWatchpoints
(
&
name
,
&
slot
,
&
condition
,
&
watchpoint_id
,
overflow_ops
);
auto
tensor_loader
=
debug_services_
->
tensor_loader
();
std
::
vector
<
std
::
shared_ptr
<
TensorData
>>
tensor_list
;
if
(
watchnode
.
empty
())
{
tensor_list
=
tensor_loader
->
GetTensor
();
}
else
{
tensor_list
=
tensor_loader
->
GetNodeTensorMap
(
watchnode
);
}
debug_services_
->
CheckWatchpoints
(
&
name
,
&
slot
,
&
condition
,
&
watchpoint_id
,
overflow_ops
,
tensor_list
);
std
::
list
<
WatchpointHit
>
hits
;
for
(
unsigned
int
i
=
0
;
i
<
name
.
size
();
i
++
)
{
WatchpointHit
hit
;
...
...
@@ -576,35 +584,6 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() {
return
hits
;
}
std
::
list
<
WatchpointHit
>
Debugger
::
CheckSingleWatchpoint
(
std
::
string
watchnode
)
const
{
auto
tensor_loader
=
debug_services_
->
tensor_loader
();
auto
tensors
=
tensor_loader
->
GetNodeTensorMap
(
watchnode
);
std
::
list
<
WatchpointHit
>
hits
;
for
(
std
::
vector
<
std
::
shared_ptr
<
TensorData
>>::
iterator
it
=
tensors
.
begin
();
it
!=
tensors
.
end
();
++
it
)
{
auto
cur_tensor
=
*
it
;
std
::
string
name
=
""
;
std
::
string
slot
=
""
;
char
*
data_ptr
=
nullptr
;
unsigned
int
data_size
=
0
;
int
condition
=
-
1
;
unsigned
int
watchpoint_id
=
-
1
;
WatchpointHit
hit
;
debug_services_
->
CheckSingleWatchpoint
(
cur_tensor
,
&
name
,
&
slot
,
&
data_ptr
,
&
data_size
,
&
condition
,
&
watchpoint_id
);
if
(
name
!=
""
)
{
hit
.
set_id
(
watchpoint_id
);
// here TensorProto act as a tensor indicator, not sending tensor content
TensorProto
*
tensor_item
=
hit
.
mutable_tensor
();
tensor_item
->
set_node_name
(
name
);
tensor_item
->
set_slot
(
slot
);
tensor_item
->
set_finished
(
true
);
WatchCondition
*
condition_item
=
hit
.
mutable_watch_condition
();
condition_item
->
set_condition
(
debugger
::
WatchCondition_Condition
(
condition
));
hits
.
push_back
(
hit
);
}
}
return
hits
;
}
void
Debugger
::
SendWatchpointsAndSuspend
(
const
std
::
list
<
WatchpointHit
>
&
points
)
{
// send info about watchpoint
if
(
!
points
.
empty
())
{
...
...
mindspore/ccsrc/debug/debugger/debugger.h
浏览文件 @
38b4413f
...
...
@@ -137,8 +137,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// analyze tensors and check watchpoint conditions
// return names of tensors and what condition they hit
std
::
list
<
WatchpointHit
>
CheckWatchpoints
();
std
::
list
<
WatchpointHit
>
CheckSingleWatchpoint
(
std
::
string
watchnode
)
const
;
std
::
list
<
WatchpointHit
>
CheckWatchpoints
(
const
std
::
string
&
watchnode
=
std
::
string
());
// send watchpoints that hit and enter command wait loop
void
SendWatchpointsAndSuspend
(
const
std
::
list
<
WatchpointHit
>
&
points
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录