Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
07741593
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
07741593
编写于
4月 01, 2021
作者:
K
kuizhiqing
提交者:
GitHub
4月 01, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
new group (#31682)
* new group * ci compatible fix * assert nccl
上级
dbeb3ea4
变更
15
展开全部
显示空白变更内容
内联
并排
Showing
15 changed file
with
670 addition
and
154 deletion
+670
-154
paddle/fluid/imperative/bkcl_context.cc
paddle/fluid/imperative/bkcl_context.cc
+25
-4
paddle/fluid/imperative/bkcl_context.h
paddle/fluid/imperative/bkcl_context.h
+2
-0
paddle/fluid/imperative/nccl_context.cc
paddle/fluid/imperative/nccl_context.cc
+24
-0
paddle/fluid/imperative/nccl_context.h
paddle/fluid/imperative/nccl_context.h
+2
-0
paddle/fluid/imperative/parallel_context.h
paddle/fluid/imperative/parallel_context.h
+2
-0
paddle/fluid/operators/collective/c_sync_calc_stream_op.cc
paddle/fluid/operators/collective/c_sync_calc_stream_op.cc
+38
-32
paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
+41
-33
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+8
-2
paddle/fluid/pybind/op_function_generator.cc
paddle/fluid/pybind/op_function_generator.cc
+2
-0
python/paddle/distributed/collective.py
python/paddle/distributed/collective.py
+328
-83
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+5
-0
python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py
...uid/tests/unittests/collective_allreduce_new_group_api.py
+56
-0
python/paddle/fluid/tests/unittests/new_group.py
python/paddle/fluid/tests/unittests/new_group.py
+83
-0
python/paddle/fluid/tests/unittests/test_new_group.sh
python/paddle/fluid/tests/unittests/test_new_group.sh
+19
-0
python/paddle/fluid/tests/unittests/test_new_group_api.py
python/paddle/fluid/tests/unittests/test_new_group_api.py
+35
-0
未找到文件。
paddle/fluid/imperative/bkcl_context.cc
浏览文件 @
07741593
...
@@ -19,12 +19,11 @@
...
@@ -19,12 +19,11 @@
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/bkcl_helper.h"
#include "paddle/fluid/platform/bkcl_helper.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/gen_comm_id_helper.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/gen_comm_id_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/string/string_helper.h"
...
@@ -77,7 +76,7 @@ void BKCLParallelContext::Init() {
...
@@ -77,7 +76,7 @@ void BKCLParallelContext::Init() {
bkcl_ids
.
resize
(
strategy_
.
nrings_
);
bkcl_ids
.
resize
(
strategy_
.
nrings_
);
if
(
strategy_
.
local_rank_
==
0
)
{
if
(
strategy_
.
local_rank_
==
0
)
{
// generate the unique
nc
clid on the root worker
// generate the unique
bk
clid on the root worker
for
(
size_t
i
=
0
;
i
<
bkcl_ids
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
bkcl_ids
.
size
();
++
i
)
{
auto
ret
=
bkcl_get_unique_id
(
&
bkcl_ids
[
i
]);
auto
ret
=
bkcl_get_unique_id
(
&
bkcl_ids
[
i
]);
PADDLE_ENFORCE_EQ
(
BKCL_SUCCESS
,
ret
,
PADDLE_ENFORCE_EQ
(
BKCL_SUCCESS
,
ret
,
...
@@ -99,6 +98,28 @@ void BKCLParallelContext::Init() {
...
@@ -99,6 +98,28 @@ void BKCLParallelContext::Init() {
}
}
}
}
void
BKCLParallelContext
::
InitWithRingID
(
int
ring_id
)
{
std
::
vector
<
BKCLUniqueId
>
bkcl_ids
;
bkcl_ids
.
resize
(
1
);
if
(
strategy_
.
local_rank_
==
0
)
{
// generate the unique bkclid on the root worker
auto
ret
=
bkcl_get_unique_id
(
&
bkcl_ids
[
0
]);
PADDLE_ENFORCE_EQ
(
BKCL_SUCCESS
,
ret
,
platform
::
errors
::
PreconditionNotMet
(
"BKCL get unique id failed [%d]"
,
ret
));
}
BcastBKCLId
(
bkcl_ids
,
0
);
int
xpu_id
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
).
device
;
VLOG
(
0
)
<<
"init BKCL context nranks: "
<<
strategy_
.
nranks_
<<
" local rank: "
<<
strategy_
.
local_rank_
<<
" xpu id: "
<<
xpu_id
<<
" ring id: "
<<
ring_id
;
// it will assign bkcl_comm in XPUDeviceContext within ring_id
platform
::
BKCLCommContext
::
Instance
().
CreateBKCLComm
(
&
bkcl_ids
[
0
],
strategy_
.
nranks_
,
strategy_
.
local_rank_
,
xpu_id
,
ring_id
);
}
void
BKCLParallelContext
::
AllReduceByStream
(
const
framework
::
Variable
&
src
,
void
BKCLParallelContext
::
AllReduceByStream
(
const
framework
::
Variable
&
src
,
framework
::
Variable
*
dst
,
framework
::
Variable
*
dst
,
int
ring_id
,
bool
use_calc_stream
)
{
int
ring_id
,
bool
use_calc_stream
)
{
...
...
paddle/fluid/imperative/bkcl_context.h
浏览文件 @
07741593
...
@@ -36,6 +36,8 @@ class BKCLParallelContext : public ParallelContext {
...
@@ -36,6 +36,8 @@ class BKCLParallelContext : public ParallelContext {
void
Init
()
override
;
void
Init
()
override
;
void
InitWithRingID
(
int
ring_id
)
override
;
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
framework
::
Variable
*
dst
,
int
ring_id
,
framework
::
Variable
*
dst
,
int
ring_id
,
bool
use_calc_stream
)
override
;
bool
use_calc_stream
)
override
;
...
...
paddle/fluid/imperative/nccl_context.cc
浏览文件 @
07741593
...
@@ -79,6 +79,30 @@ void NCCLParallelContext::Init() {
...
@@ -79,6 +79,30 @@ void NCCLParallelContext::Init() {
}
}
}
}
void
NCCLParallelContext
::
InitWithRingID
(
int
ring_id
)
{
std
::
vector
<
ncclUniqueId
>
nccl_ids
;
nccl_ids
.
resize
(
1
);
if
(
strategy_
.
local_rank_
==
0
)
{
// generate the unique ncclid on the root worker
platform
::
dynload
::
ncclGetUniqueId
(
&
nccl_ids
[
0
]);
}
BcastNCCLId
(
nccl_ids
,
0
);
int
gpu_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
).
device
;
VLOG
(
0
)
<<
"init nccl context nranks: "
<<
strategy_
.
nranks_
<<
" local rank: "
<<
strategy_
.
local_rank_
<<
" gpu id: "
<<
gpu_id
<<
" ring id: "
<<
ring_id
;
// it will assign nccl_comm in CUDADeviceContext within ring_id
platform
::
NCCLCommContext
::
Instance
().
CreateNCCLComm
(
&
nccl_ids
[
0
],
strategy_
.
nranks_
,
strategy_
.
local_rank_
,
gpu_id
,
ring_id
);
compute_events_
.
emplace_back
(
platform
::
CudaEventResourcePool
::
Instance
().
New
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
).
device
));
comm_events_
.
emplace_back
(
platform
::
CudaEventResourcePool
::
Instance
().
New
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
).
device
));
}
void
NCCLParallelContext
::
AllReduceByStream
(
const
framework
::
Variable
&
src
,
void
NCCLParallelContext
::
AllReduceByStream
(
const
framework
::
Variable
&
src
,
framework
::
Variable
*
dst
,
framework
::
Variable
*
dst
,
int
ring_id
,
bool
use_calc_stream
)
{
int
ring_id
,
bool
use_calc_stream
)
{
...
...
paddle/fluid/imperative/nccl_context.h
浏览文件 @
07741593
...
@@ -53,6 +53,8 @@ class NCCLParallelContext : public ParallelContext {
...
@@ -53,6 +53,8 @@ class NCCLParallelContext : public ParallelContext {
void
Init
()
override
;
void
Init
()
override
;
void
InitWithRingID
(
int
ring_id
)
override
;
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
framework
::
Variable
*
dst
,
int
ring_id
,
framework
::
Variable
*
dst
,
int
ring_id
,
bool
use_calc_stream
)
override
;
bool
use_calc_stream
)
override
;
...
...
paddle/fluid/imperative/parallel_context.h
浏览文件 @
07741593
...
@@ -50,6 +50,8 @@ class ParallelContext {
...
@@ -50,6 +50,8 @@ class ParallelContext {
virtual
void
Init
()
=
0
;
virtual
void
Init
()
=
0
;
virtual
void
InitWithRingID
(
int
ring_id
)
=
0
;
virtual
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
virtual
void
AllReduceByStream
(
const
framework
::
Variable
&
src
,
framework
::
Variable
*
dst
,
int
ring_id
,
framework
::
Variable
*
dst
,
int
ring_id
,
bool
use_calc_stream
)
=
0
;
bool
use_calc_stream
)
=
0
;
...
...
paddle/fluid/operators/collective/c_sync_calc_stream_op.cc
浏览文件 @
07741593
...
@@ -15,40 +15,20 @@ limitations under the License. */
...
@@ -15,40 +15,20 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
class
CSyncCalcStreamOp
:
public
framework
::
Operator
Base
{
class
CSyncCalcStreamOp
:
public
framework
::
Operator
WithKernel
{
public:
public:
CSyncCalcStreamOp
(
const
std
::
string
&
type
,
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
platform
::
Place
&
place
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
PADDLE_ENFORCE_EQ
(
is_gpu_place
(
place
),
true
,
ctx
.
GetPlace
());
platform
::
errors
::
PreconditionNotMet
(
"Sync stream op can run on gpu place only for now."
));
#if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32)
auto
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamSynchronize
(
dev_ctx
->
stream
()));
#else
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
dev_ctx
->
stream
()));
#endif
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"PaddlePaddle should compile with GPU."
));
#endif
}
}
};
};
...
@@ -65,10 +45,36 @@ Call calculation stream synchronization.
...
@@ -65,10 +45,36 @@ Call calculation stream synchronization.
}
}
};
};
template
<
typename
T
>
class
CSyncCalcStreamCudaKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32)
auto
place
=
ctx
.
GetPlace
();
auto
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamSynchronize
(
dev_ctx
->
stream
()));
#else
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
dev_ctx
->
stream
()));
#endif
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"PaddlePaddle should compile with GPU."
));
#endif
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
ERATOR
(
c_sync_calc_stream
,
ops
::
CSyncCalcStreamOp
,
REGISTER_OP
_WITHOUT_GRADIENT
(
c_sync_calc_stream
,
ops
::
CSyncCalcStreamOp
,
ops
::
CSyncCalcStreamOpMaker
);
ops
::
CSyncCalcStreamOpMaker
);
REGISTER_OP_CUDA_KERNEL
(
c_sync_calc_stream
,
ops
::
CSyncCalcStreamCudaKernel
<
float
>
);
paddle/fluid/operators/collective/c_sync_comm_stream_op.cc
浏览文件 @
07741593
...
@@ -14,45 +14,25 @@ limitations under the License. */
...
@@ -14,45 +14,25 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/platform/
collective
_helper.h"
#include "paddle/fluid/platform/
nccl
_helper.h"
#endif
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
class
CSyncCommStreamOp
:
public
framework
::
Operator
Base
{
class
CSyncCommStreamOp
:
public
framework
::
Operator
WithKernel
{
public:
public:
CSyncCommStreamOp
(
const
std
::
string
&
type
,
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
PADDLE_ENFORCE_EQ
(
is_gpu_place
(
place
),
true
,
platform
::
errors
::
PreconditionNotMet
(
"Sync stream op can run on gpu place only for now."
));
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
int
ring_id
=
Attr
<
int
>
(
"ring_id"
);
auto
stream
=
protected:
platform
::
NCCLCommContext
::
Instance
().
Get
(
ring_id
,
place
)
->
stream
();
framework
::
OpKernelType
GetExpectedKernelType
(
#ifdef PADDLE_WITH_RCCL
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamSynchronize
(
stream
));
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
#else
ctx
.
GetPlace
());
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream
));
#endif
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"PaddlePaddle should compile with GPU."
));
#endif
}
}
};
};
...
@@ -72,10 +52,38 @@ Call communication stream synchronization.
...
@@ -72,10 +52,38 @@ Call communication stream synchronization.
}
}
};
};
template
<
typename
T
>
class
CSyncCommStreamCudaKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
auto
place
=
ctx
.
GetPlace
();
int
ring_id
=
ctx
.
Attr
<
int
>
(
"ring_id"
);
auto
stream
=
platform
::
NCCLCommContext
::
Instance
().
Get
(
ring_id
,
place
)
->
stream
();
#ifdef PADDLE_WITH_RCCL
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamSynchronize
(
stream
));
#else
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream
));
#endif
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"PaddlePaddle should compile with GPU."
));
#endif
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
ERATOR
(
c_sync_comm_stream
,
ops
::
CSyncCommStreamOp
,
REGISTER_OP
_WITHOUT_GRADIENT
(
c_sync_comm_stream
,
ops
::
CSyncCommStreamOp
,
ops
::
CSyncCommStreamOpMaker
);
ops
::
CSyncCommStreamOpMaker
);
REGISTER_OP_CUDA_KERNEL
(
c_sync_comm_stream
,
ops
::
CSyncCommStreamCudaKernel
<
float
>
);
paddle/fluid/pybind/imperative.cc
浏览文件 @
07741593
...
@@ -1578,7 +1578,10 @@ void BindImperative(py::module *m_ptr) {
...
@@ -1578,7 +1578,10 @@ void BindImperative(py::module *m_ptr) {
m
,
"NCCLParallelContext"
)
m
,
"NCCLParallelContext"
)
.
def
(
py
::
init
<
const
imperative
::
ParallelStrategy
&
,
.
def
(
py
::
init
<
const
imperative
::
ParallelStrategy
&
,
const
platform
::
CUDAPlace
&>
())
const
platform
::
CUDAPlace
&>
())
.
def
(
"init"
,
[](
imperative
::
NCCLParallelContext
&
self
)
{
self
.
Init
();
});
.
def
(
"init"
,
[](
imperative
::
NCCLParallelContext
&
self
)
{
self
.
Init
();
})
.
def
(
"init_with_ring_id"
,
&
imperative
::
NCCLParallelContext
::
InitWithRingID
,
py
::
arg
(
"ring_id"
));
#endif
#endif
#if defined(PADDLE_WITH_XPU_BKCL)
#if defined(PADDLE_WITH_XPU_BKCL)
...
@@ -1587,7 +1590,10 @@ void BindImperative(py::module *m_ptr) {
...
@@ -1587,7 +1590,10 @@ void BindImperative(py::module *m_ptr) {
m
,
"BKCLParallelContext"
)
m
,
"BKCLParallelContext"
)
.
def
(
py
::
init
<
const
imperative
::
ParallelStrategy
&
,
.
def
(
py
::
init
<
const
imperative
::
ParallelStrategy
&
,
const
platform
::
XPUPlace
&>
())
const
platform
::
XPUPlace
&>
())
.
def
(
"init"
,
[](
imperative
::
BKCLParallelContext
&
self
)
{
self
.
Init
();
});
.
def
(
"init"
,
[](
imperative
::
BKCLParallelContext
&
self
)
{
self
.
Init
();
})
.
def
(
"init_with_ring_id"
,
&
imperative
::
BKCLParallelContext
::
InitWithRingID
,
py
::
arg
(
"ring_id"
));
#endif
#endif
}
}
...
...
paddle/fluid/pybind/op_function_generator.cc
浏览文件 @
07741593
...
@@ -119,6 +119,8 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
...
@@ -119,6 +119,8 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
{
"fill_constant"
,
{
"Out"
}},
{
"fill_constant"
,
{
"Out"
}},
{
"matmul"
,
{
"Out"
}},
{
"matmul"
,
{
"Out"
}},
{
"c_broadcast"
,
{
"Out"
}},
{
"c_broadcast"
,
{
"Out"
}},
{
"c_sync_calc_stream"
,
{
"Out"
}},
{
"c_sync_comm_stream"
,
{
"Out"
}},
{
"c_allreduce_sum"
,
{
"Out"
}},
{
"c_allreduce_sum"
,
{
"Out"
}},
{
"c_allreduce_max"
,
{
"Out"
}},
{
"c_allreduce_max"
,
{
"Out"
}},
{
"c_allreduce_min"
,
{
"Out"
}},
{
"c_allreduce_min"
,
{
"Out"
}},
...
...
python/paddle/distributed/collective.py
浏览文件 @
07741593
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
07741593
...
@@ -82,6 +82,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
...
@@ -82,6 +82,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_scatter_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_scatter_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_barrier_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_barrier_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_allreduce_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_allreduce_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_new_group_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_broadcast_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_broadcast_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_allgather_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_allgather_api
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_wait
)
LIST
(
REMOVE_ITEM TEST_OPS test_collective_wait
)
...
@@ -177,6 +178,7 @@ endif()
...
@@ -177,6 +178,7 @@ endif()
if
((
NOT WITH_NCCL
)
AND
(
NOT WITH_RCCL
))
if
((
NOT WITH_NCCL
)
AND
(
NOT WITH_RCCL
))
list
(
REMOVE_ITEM TEST_OPS test_imperative_group
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_group
)
LIST
(
REMOVE_ITEM TEST_OPS test_new_group_api
)
endif
()
endif
()
if
(((
NOT WITH_ROCM
)
AND
(
NOT WITH_GPU
))
OR WIN32
)
if
(((
NOT WITH_ROCM
)
AND
(
NOT WITH_GPU
))
OR WIN32
)
...
@@ -518,6 +520,7 @@ if(WITH_DISTRIBUTE)
...
@@ -518,6 +520,7 @@ if(WITH_DISTRIBUTE)
if
(
WITH_GPU OR WITH_ROCM
)
if
(
WITH_GPU OR WITH_ROCM
)
bash_test_modules
(
test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
bash_test_modules
(
test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
py_test_modules
(
test_launch_coverage MODULES test_launch_coverage
)
py_test_modules
(
test_launch_coverage MODULES test_launch_coverage
)
bash_test_modules
(
test_new_group START_BASH test_new_group.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
endif
()
endif
()
bash_test_modules
(
test_fleetrun START_BASH test_fleetrun.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
bash_test_modules
(
test_fleetrun START_BASH test_fleetrun.sh ENVS PADDLE_BINARY_DIR=
${
PADDLE_BINARY_DIR
}
)
...
@@ -831,6 +834,7 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
...
@@ -831,6 +834,7 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
set_tests_properties
(
test_collective_allgather_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_collective_allgather_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_collective_broadcast_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_collective_broadcast_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_collective_allreduce_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_collective_allreduce_api PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_new_group_api PROPERTIES TIMEOUT 120
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_DISTRIBUTE
)
set_tests_properties
(
test_pipeline PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_pipeline PROPERTIES TIMEOUT 120
)
endif
()
endif
()
...
@@ -853,6 +857,7 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
...
@@ -853,6 +857,7 @@ if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
test_collective_barrier_api
test_collective_barrier_api
test_collective_reduce_api
test_collective_reduce_api
test_collective_allreduce_api
test_collective_allreduce_api
test_new_group_api
test_collective_broadcast_api
test_collective_broadcast_api
test_collective_allgather_api
test_collective_allgather_api
PROPERTIES LABELS
"RUN_TYPE=DIST"
)
PROPERTIES LABELS
"RUN_TYPE=DIST"
)
...
...
python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py
0 → 100644
浏览文件 @
07741593
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
os
import
sys
import
signal
import
time
import
socket
from
contextlib
import
closing
from
six
import
string_types
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
import
paddle.fluid.unique_name
as
nameGen
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
paddle.fluid.layers
as
layers
from
functools
import
reduce
from
test_collective_api_base
import
TestCollectiveAPIRunnerBase
,
runtime_main
paddle
.
enable_static
()
class
TestCollectiveAllreduceNewGroupAPI
(
TestCollectiveAPIRunnerBase
):
def
__init__
(
self
):
self
.
global_ring_id
=
0
def
get_model
(
self
,
main_prog
,
startup_program
,
rank
):
with
fluid
.
program_guard
(
main_prog
,
startup_program
):
tindata
=
layers
.
data
(
name
=
"tindata"
,
shape
=
[
10
,
1000
],
dtype
=
'float32'
)
gp
=
paddle
.
distributed
.
new_group
([
0
,
1
])
paddle
.
distributed
.
all_reduce
(
tindata
,
group
=
gp
,
use_calc_stream
=
False
)
return
[
tindata
]
if
__name__
==
"__main__"
:
runtime_main
(
TestCollectiveAllreduceNewGroupAPI
,
"allreduce"
)
python/paddle/fluid/tests/unittests/new_group.py
0 → 100644
浏览文件 @
07741593
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
os
import
paddle
class
TestNewGroupAPI
(
object
):
def
__init__
(
self
):
paddle
.
distributed
.
init_parallel_env
()
d1
=
np
.
array
([
1
,
2
,
3
])
d2
=
np
.
array
([
2
,
3
,
4
])
self
.
tensor1
=
paddle
.
to_tensor
(
d1
)
self
.
tensor2
=
paddle
.
to_tensor
(
d2
)
def
test_all
(
self
):
gp
=
paddle
.
distributed
.
new_group
([
0
,
1
])
print
(
"test new group api ok"
)
tmp
=
np
.
array
([
0
,
0
,
0
])
result
=
paddle
.
to_tensor
(
tmp
)
paddle
.
distributed
.
scatter
(
result
,
[
self
.
tensor2
,
self
.
tensor1
],
src
=
0
,
group
=
gp
,
use_calc_stream
=
True
)
if
gp
.
rank
==
0
:
assert
np
.
array_equal
(
result
,
self
.
tensor2
)
elif
gp
.
rank
==
1
:
assert
np
.
array_equal
(
result
,
self
.
tensor1
)
print
(
"test scatter api ok"
)
paddle
.
distributed
.
broadcast
(
result
,
src
=
1
,
group
=
gp
,
use_calc_stream
=
True
)
assert
np
.
array_equal
(
result
,
self
.
tensor1
)
print
(
"test broadcast api ok"
)
paddle
.
distributed
.
reduce
(
result
,
dst
=
0
,
group
=
gp
,
use_calc_stream
=
True
)
if
gp
.
rank
==
0
:
assert
np
.
array_equal
(
result
,
paddle
.
add
(
self
.
tensor1
,
self
.
tensor1
))
elif
gp
.
rank
==
1
:
assert
np
.
array_equal
(
result
,
self
.
tensor1
)
print
(
"test reduce api ok"
)
paddle
.
distributed
.
all_reduce
(
result
,
use_calc_stream
=
True
)
assert
np
.
array_equal
(
result
,
paddle
.
add
(
paddle
.
add
(
self
.
tensor1
,
self
.
tensor1
),
self
.
tensor1
))
print
(
"test all_reduce api ok"
)
paddle
.
distributed
.
wait
(
result
,
gp
,
use_calc_stream
=
True
)
paddle
.
distributed
.
wait
(
result
,
gp
,
use_calc_stream
=
False
)
print
(
"test wait api ok"
)
result
=
[]
paddle
.
distributed
.
all_gather
(
result
,
self
.
tensor1
,
group
=
gp
,
use_calc_stream
=
True
)
assert
np
.
array_equal
(
result
[
0
],
self
.
tensor1
)
assert
np
.
array_equal
(
result
[
1
],
self
.
tensor1
)
print
(
"test all_gather api ok"
)
paddle
.
distributed
.
barrier
(
group
=
gp
)
print
(
"test barrier api ok"
)
return
if
__name__
==
"__main__"
:
gpt
=
TestNewGroupAPI
()
gpt
.
test_all
()
python/paddle/fluid/tests/unittests/test_new_group.sh
0 → 100755
浏览文件 @
07741593
#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
CUDA_VISIBLE_DEVICES
=
0,1 python
-m
paddle.distributed.launch
--gpus
=
0,1 new_group.py
python/paddle/fluid/tests/unittests/test_new_group_api.py
0 → 100644
浏览文件 @
07741593
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
from
test_collective_api_base
import
TestDistBase
paddle
.
enable_static
()
class
TestCollectiveAllreduceAPI
(
TestDistBase
):
def
_setup_config
(
self
):
pass
def
test_allreduce_nccl
(
self
):
self
.
check_with_place
(
"collective_allreduce_new_group_api.py"
,
"allreduce"
,
"nccl"
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录