Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
63fb41b3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
63fb41b3
编写于
10月 24, 2017
作者:
D
Dong Zhihong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"redefine the initop from kernel to OpBase"
上级
026c61c0
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
70 addition
and
24 deletion
+70
-24
paddle/framework/operator.h
paddle/framework/operator.h
+1
-1
paddle/operators/nccl_op.cc
paddle/operators/nccl_op.cc
+21
-16
paddle/operators/nccl_op.cu
paddle/operators/nccl_op.cu
+20
-1
paddle/operators/nccl_op_test.cu
paddle/operators/nccl_op_test.cu
+28
-6
未找到文件。
paddle/framework/operator.h
浏览文件 @
63fb41b3
...
...
@@ -125,7 +125,7 @@ class OperatorBase {
protected:
std
::
string
type_
;
// NOTE: in case of OpGrad, inputs_ contains:
// I (Inputs)
opear
// I (Inputs)
// O (Outputs)
// OG (Output Gradients)
VariableNameMap
inputs_
;
...
...
paddle/operators/nccl_op.cc
浏览文件 @
63fb41b3
...
...
@@ -9,26 +9,30 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/nccl_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/nccl/nccl_gpu_common.h"
namespace
paddle
{
namespace
operators
{
// NCCLinitOp
class
NCCLInitOp
:
public
framework
::
Operator
WithKernel
{
class
NCCLInitOp
:
public
framework
::
Operator
Base
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Communicator"
),
" Output(Communicator) of ncclInitOp should not be NULL"
);
}
protected:
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
static_cast
<
framework
::
DataType
>
(
ctx
.
Attr
<
int
>
(
"data_type"
));
NCCLInitOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
const
auto
&
name
=
Output
(
"Communicator"
);
PADDLE_ENFORCE_NOT_NULL
(
scope
.
FindVar
(
name
),
"Can not find variable '%s' in the scope."
,
name
);
std
::
vector
<
int
>
gpus
=
Attr
<
std
::
vector
<
int
>>
(
"gpus"
);
PADDLE_ENFORCE
(
!
gpus
.
empty
(),
"Attr(gpus) should not be empty."
);
platform
::
Communicator
*
comm
=
scope
.
FindVar
(
name
)
->
GetMutable
<
platform
::
Communicator
>
();
comm
->
InitAll
(
gpus
);
}
};
...
...
@@ -188,13 +192,14 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
ncclInit
,
ops
::
NCCLInitOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
ops
::
NCCLInitOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
ncclAllReduce
,
ops
::
NCCLAllReduceOp
,
ops
::
NCCLAllReduceOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
ncclInit
,
ops
::
NCCLInitOp
,
ops
::
NCCLInitOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
ncclBcastSend
,
ops
::
NCCLBcastSendOp
,
ops
::
NCCLBcastSendOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
ncclBcastRecv
,
ops
::
NCCLBcastRecvOp
,
ops
::
NCCLBcastRecvOpMaker
);
REGISTER_OP_WITHOUT_GRADIENT
(
ncclReduce
,
ops
::
NCCLReduceOp
,
ops
::
NCCLReduceOpMaker
);
REGISTER_OP_CPU_KERNEL
(
ncclInit
,
ops
::
NCCLInitKernel
<
float
>
);
paddle/operators/nccl_op.cu
浏览文件 @
63fb41b3
...
...
@@ -12,11 +12,30 @@ limitations under the License. */
#define EIGEN_USE_GPU
#include <functional>
#include "paddle/operators/nccl_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/nccl/nccl_gpu_common.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
using
platform
::
Communicator
;
template
<
typename
Type
>
class
NCCLTypeWrapper
;
template
<
>
class
NCCLTypeWrapper
<
float
>
{
public:
static
const
ncclDataType_t
type
=
ncclFloat
;
};
template
<
>
class
NCCLTypeWrapper
<
double
>
{
public:
static
const
ncclDataType_t
type
=
ncclDouble
;
};
template
<
typename
T
>
class
NCCLAllReduceKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
paddle/operators/nccl_op_test.cu
浏览文件 @
63fb41b3
...
...
@@ -11,7 +11,6 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/nccl_op.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
...
...
@@ -65,11 +64,11 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
TEST
(
NCCL
,
ncclInitOp
)
{
f
::
ProgramDescBind
program
;
f
::
BlockDescBind
*
block
=
program
.
Block
(
0
);
f
::
OpDescBind
*
op
1
=
block
->
AppendOp
();
f
::
OpDescBind
*
op
_desc
=
block
->
AppendOp
();
op
1
->
SetType
(
"ncclInit"
);
op
1
->
SetOutput
(
"Communicator"
,
{
"x1"
});
op
1
->
SetAttr
(
"gpus"
,
{
gpu_list
});
op
_desc
->
SetType
(
"ncclInit"
);
op
_desc
->
SetOutput
(
"Communicator"
,
{
"x1"
});
op
_desc
->
SetAttr
(
"gpus"
,
{
gpu_list
});
f
::
Scope
g_scope
;
paddle
::
platform
::
DeviceContext
*
ctx
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
...
...
@@ -77,7 +76,30 @@ TEST(NCCL, ncclInitOp) {
auto
*
var
=
g_scope
.
Var
(
"x1"
);
var
->
GetMutable
<
paddle
::
platform
::
Communicator
>
();
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op1
);
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
1
)
<<
"invoke NCCLInitOp."
;
op
->
Run
(
g_scope
,
*
ctx
);
VLOG
(
1
)
<<
"NCCLInitOp finished."
;
}
// ncclAllReduceOp with desc
TEST
(
NCCL
,
ncclInitOp
)
{
f
::
ProgramDescBind
program
;
f
::
BlockDescBind
*
block
=
program
.
Block
(
0
);
f
::
OpDescBind
*
op_desc
=
block
->
AppendOp
();
op_desc
->
SetType
(
"ncclAllReduce"
);
op_desc
->
SetOutput
(
"Communicator"
,
{
"x1"
});
op_desc
->
SetAttr
(
"gpus"
,
{
gpu_list
});
f
::
Scope
g_scope
;
paddle
::
platform
::
DeviceContext
*
ctx
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
auto
*
var
=
g_scope
.
Var
(
"x1"
);
var
->
GetMutable
<
paddle
::
platform
::
Communicator
>
();
auto
op
=
f
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
1
)
<<
"invoke NCCLInitOp."
;
op
->
Run
(
g_scope
,
*
ctx
);
VLOG
(
1
)
<<
"NCCLInitOp finished."
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录