Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e92f0388
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e92f0388
编写于
8月 13, 2021
作者:
B
Baibaifan
提交者:
GitHub
8月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add retry for gethostbyname (#34855)
上级
2164ad61
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
39 addition
and
2 deletion
+39
-2
paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc
paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc
+11
-1
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+14
-0
paddle/fluid/platform/gen_comm_id_helper.cc
paddle/fluid/platform/gen_comm_id_helper.cc
+12
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-0
未找到文件。
paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc
浏览文件 @
e92f0388
...
@@ -34,6 +34,8 @@ limitations under the License. */
...
@@ -34,6 +34,8 @@ limitations under the License. */
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#endif
#endif
DECLARE_int32
(
get_host_by_name_time
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -226,7 +228,15 @@ static int ConnectAddr(const std::string& ep, const char* head) {
...
@@ -226,7 +228,15 @@ static int ConnectAddr(const std::string& ep, const char* head) {
char
*
ip
=
NULL
;
char
*
ip
=
NULL
;
struct
hostent
*
hp
=
NULL
;
struct
hostent
*
hp
=
NULL
;
// sleep for get_host_by_name_time seconds.
for
(
int
i
=
0
;
2
*
i
<
FLAGS_get_host_by_name_time
;
i
++
)
{
hp
=
gethostbyname
(
host
.
c_str
());
hp
=
gethostbyname
(
host
.
c_str
());
if
(
hp
!=
NULL
)
{
break
;
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
2
));
LOG
(
WARNING
)
<<
"gethostbyname "
<<
host
.
c_str
()
<<
" error!"
;
}
PADDLE_ENFORCE_NOT_NULL
(
hp
,
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_NOT_NULL
(
hp
,
platform
::
errors
::
InvalidArgument
(
"Fail to get host by name %s."
,
host
));
"Fail to get host by name %s."
,
host
));
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
e92f0388
...
@@ -606,3 +606,17 @@ DEFINE_bool(check_kernel_launch, false,
...
@@ -606,3 +606,17 @@ DEFINE_bool(check_kernel_launch, false,
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool
(
conv2d_disable_cudnn
,
false
,
"Disable cudnn in conv2d"
);
DEFINE_bool
(
conv2d_disable_cudnn
,
false
,
"Disable cudnn in conv2d"
);
#endif
#endif
/**
* Distributed related FLAG
* Name: FLAGS_get_host_by_name_time
* Since Version: 2.2.0
* Value Range: int32, default=120
* Example:
* Note: Get host by name time.
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_XPU) || \
defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_HIP)
DEFINE_int32
(
get_host_by_name_time
,
120
,
"The maximum time for get host by name time"
);
#endif
paddle/fluid/platform/gen_comm_id_helper.cc
浏览文件 @
e92f0388
...
@@ -37,6 +37,8 @@ limitations under the License. */
...
@@ -37,6 +37,8 @@ limitations under the License. */
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/collective_helper.h"
#endif
#endif
DECLARE_int32
(
get_host_by_name_time
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -236,7 +238,16 @@ static int ConnectAddr(const std::string& ep, const CommHead head) {
...
@@ -236,7 +238,16 @@ static int ConnectAddr(const std::string& ep, const CommHead head) {
char
*
ip
=
NULL
;
char
*
ip
=
NULL
;
struct
hostent
*
hp
=
NULL
;
struct
hostent
*
hp
=
NULL
;
// sleep for get_host_by_name_time seconds.
for
(
int
i
=
0
;
2
*
i
<
FLAGS_get_host_by_name_time
;
i
++
)
{
hp
=
gethostbyname
(
host
.
c_str
());
hp
=
gethostbyname
(
host
.
c_str
());
if
(
hp
!=
NULL
)
{
break
;
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
2
));
LOG
(
WARNING
)
<<
"gethostbyname "
<<
host
.
c_str
()
<<
" error!"
;
}
PADDLE_ENFORCE_NOT_NULL
(
hp
,
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_NOT_NULL
(
hp
,
platform
::
errors
::
InvalidArgument
(
"Fail to get host by name %s."
,
host
));
"Fail to get host by name %s."
,
host
));
...
...
python/paddle/fluid/__init__.py
浏览文件 @
e92f0388
...
@@ -236,6 +236,7 @@ def __bootstrap__():
...
@@ -236,6 +236,7 @@ def __bootstrap__():
'local_exe_sub_scope_limit'
,
'local_exe_sub_scope_limit'
,
'gpu_memory_limit_mb'
,
'gpu_memory_limit_mb'
,
'conv2d_disable_cudnn'
,
'conv2d_disable_cudnn'
,
'get_host_by_name_time'
,
]
]
if
core
.
is_compiled_with_npu
():
if
core
.
is_compiled_with_npu
():
...
@@ -246,6 +247,7 @@ def __bootstrap__():
...
@@ -246,6 +247,7 @@ def __bootstrap__():
'reallocate_gpu_memory_in_mb'
,
'reallocate_gpu_memory_in_mb'
,
'gpu_memory_limit_mb'
,
'gpu_memory_limit_mb'
,
'npu_config_path'
,
'npu_config_path'
,
'get_host_by_name_time'
,
]
]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录