Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
7b10f21f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7b10f21f
编写于
8月 26, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 26, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5161 Fix the defunct process issue when run 8P case in GPU mode.
Merge pull request !5161 from ZhangQinghua/master
上级
5d4e67ee
be1c49ee
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
68 addition
and
21 deletion
+68
-21
mindspore/ccsrc/common/duplex_pipe.cc
mindspore/ccsrc/common/duplex_pipe.cc
+52
-6
mindspore/ccsrc/common/duplex_pipe.h
mindspore/ccsrc/common/duplex_pipe.h
+14
-13
mindspore/ccsrc/common/duplex_pipe_win.cc
mindspore/ccsrc/common/duplex_pipe_win.cc
+2
-2
未找到文件。
mindspore/ccsrc/common/duplex_pipe.cc
浏览文件 @
7b10f21f
...
...
@@ -16,7 +16,7 @@
#include "common/duplex_pipe.h"
#include <s
ignal
.h>
#include <s
ys/wait
.h>
#include <iostream>
#include <vector>
#include <algorithm>
...
...
@@ -70,6 +70,8 @@ int DuplexPipe::Open(std::initializer_list<std::string> arg_list, bool append_fd
local_stderr_
=
dup
(
STDERR_FILENO
);
close
(
fd1_
[
0
]);
close
(
fd2_
[
1
]);
signal_handler_
=
std
::
make_shared
<
SignalHandler
>
(
shared_from_this
(),
pid_
);
}
return
0
;
}
...
...
@@ -147,14 +149,58 @@ void DuplexPipe::Close() {
close
(
fd2_
[
1
]);
}
void
DuplexPipe
::
Alarm
::
Set
(
std
::
shared_ptr
<
DuplexPipe
>
dp
,
unsigned
int
interval_secs
)
{
DuplexPipe
::
SignalHandler
::
SignalHandler
(
std
::
shared_ptr
<
DuplexPipe
>
dp
,
pid_t
pid
)
{
dp_
=
dp
;
signal
(
SIGALRM
,
SigHandler
);
child_pid_
=
pid
;
signal
(
SIGCHLD
,
SigChildHandler
);
signal
(
SIGPIPE
,
SigPipeHandler
);
}
DuplexPipe
::
SignalHandler
::~
SignalHandler
()
{
dp_
.
reset
();
}
void
DuplexPipe
::
SignalHandler
::
SetAlarm
(
unsigned
int
interval_secs
)
{
signal
(
SIGALRM
,
SigAlarmHandler
);
alarm
(
interval_secs
);
}
void
DuplexPipe
::
Alarm
::
Cancel
()
{
alarm
(
0
);
dp_
.
reset
();
void
DuplexPipe
::
SignalHandler
::
CancelAlarm
()
{
alarm
(
0
);
}
void
DuplexPipe
::
SignalHandler
::
SigAlarmHandler
(
int
sig
)
{
DP_INFO
<<
"Signal: "
<<
sig
<<
", child_pid_: "
<<
child_pid_
;
if
(
!
dp_
.
expired
())
{
dp_
.
lock
()
->
TimeOut
();
}
}
void
DuplexPipe
::
SignalHandler
::
SigPipeHandler
(
int
sig
)
{
DP_INFO
<<
"Signal: "
<<
sig
;
if
(
!
dp_
.
expired
())
{
dp_
.
lock
()
->
Close
();
}
}
void
DuplexPipe
::
SignalHandler
::
SigChildHandler
(
int
sig
)
{
DP_INFO
<<
"Signal: "
<<
sig
<<
", child_pid_: "
<<
child_pid_
;
int
status
;
auto
pid
=
waitpid
(
child_pid_
,
&
status
,
WNOHANG
|
WUNTRACED
);
if
(
WIFEXITED
(
status
))
{
DP_ERROR
<<
"Child exited, status: "
<<
WEXITSTATUS
(
status
)
<<
", pid: "
<<
pid
;
if
(
!
dp_
.
expired
())
{
dp_
.
lock
()
->
Close
();
}
// When run multiple processes by 'mpirun',
// parent process never quit even Exception happens,
// which caused by MPI_Finalize() never returned.
exit
(
-
1
);
}
else
if
(
WIFSTOPPED
(
status
))
{
DP_ERROR
<<
"Child stopped, sig: "
<<
WSTOPSIG
(
status
)
<<
", pid: "
<<
pid
;
}
else
if
(
WIFSIGNALED
(
status
))
{
DP_INFO
<<
"Child not exited, signaled, sig: "
<<
WTERMSIG
(
status
)
<<
", pid: "
<<
pid
;
}
else
if
(
WIFCONTINUED
(
status
))
{
DP_INFO
<<
"Child continued, pid: "
<<
pid
;
}
else
{
DP_ERROR
<<
"Wrong child status: "
<<
status
<<
", pid: "
<<
pid
;
}
}
}
// namespace mindspore
mindspore/ccsrc/common/duplex_pipe.h
浏览文件 @
7b10f21f
...
...
@@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_COMMON_DUPLEX_PIPE_H_
#include <unistd.h>
#include <signal.h>
#include <string>
#include <memory>
#include <initializer_list>
...
...
@@ -61,8 +62,8 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
DuplexPipe
&
operator
>>
(
std
::
string
&
buf
);
private:
void
SetTimeOut
()
{
alarm_
.
Set
(
shared_from_this
(),
time_out_secs_
);
}
void
CancelTimeOut
()
{
alarm_
.
Cancel
();
}
void
SetTimeOut
()
{
signal_handler_
->
SetAlarm
(
time_out_secs_
);
}
void
CancelTimeOut
()
{
signal_handler_
->
CancelAlarm
();
}
void
TimeOut
()
{
if
(
has_time_out_callback_
)
{
time_out_callback_
();
...
...
@@ -96,27 +97,27 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
int
remote_stdout_
;
int
remote_stderr_
;
class
Alarm
{
class
SignalHandler
{
public:
Alarm
()
=
default
;
~
Alarm
()
=
default
;
SignalHandler
(
std
::
shared_ptr
<
DuplexPipe
>
dp
,
pid_t
pid
)
;
~
SignalHandler
()
;
void
Set
(
std
::
shared_ptr
<
DuplexPipe
>
dp
,
unsigned
int
interval_secs
);
void
Cancel
();
void
Set
Alarm
(
unsigned
int
interval_secs
);
void
Cancel
Alarm
();
private:
static
void
SigHandler
(
int
sig
)
{
DP_INFO
<<
"Signal: "
<<
sig
;
dp_
->
TimeOut
();
}
static
void
SigAlarmHandler
(
int
sig
);
static
void
SigPipeHandler
(
int
sig
);
static
void
SigChildHandler
(
int
sig
);
inline
static
std
::
shared_ptr
<
DuplexPipe
>
dp_
;
inline
static
std
::
weak_ptr
<
DuplexPipe
>
dp_
;
inline
static
pid_t
child_pid_
;
};
unsigned
int
time_out_secs_
=
kTimeOutSeconds
;
bool
has_time_out_callback_
=
false
;
std
::
function
<
void
()
>
time_out_callback_
;
Alarm
alarm
_
;
std
::
shared_ptr
<
SignalHandler
>
signal_handler
_
;
};
}
// namespace mindspore
...
...
mindspore/ccsrc/common/duplex_pipe_win.cc
浏览文件 @
7b10f21f
...
...
@@ -40,9 +40,9 @@ DuplexPipe &DuplexPipe::operator>>(std::string &buf) { DP_EXCEPTION << "Not supp
void
DuplexPipe
::
Close
()
{
DP_EXCEPTION
<<
"Not support for Windows by now."
;
}
void
DuplexPipe
::
Alarm
::
Set
(
std
::
shared_ptr
<
DuplexPipe
>
dp
,
unsigned
int
interval_secs
)
{
void
DuplexPipe
::
SignalHandler
::
SetAlarm
(
unsigned
int
interval_secs
)
{
DP_EXCEPTION
<<
"Not support for Windows by now."
;
}
void
DuplexPipe
::
Alarm
::
Cancel
()
{
DP_EXCEPTION
<<
"Not support for Windows by now."
;
}
void
DuplexPipe
::
SignalHandler
::
CancelAlarm
()
{
DP_EXCEPTION
<<
"Not support for Windows by now."
;
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录