Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
672cdc21
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
672cdc21
编写于
2月 09, 2018
作者:
Y
Yang Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add nccl
上级
1c91574b
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
21 addition
and
15 deletion
+21
-15
CMakeLists.txt
CMakeLists.txt
+0
-1
paddle/framework/executor.cc
paddle/framework/executor.cc
+4
-2
paddle/framework/framework.proto
paddle/framework/framework.proto
+1
-0
paddle/operators/nccl_op.cc
paddle/operators/nccl_op.cc
+8
-8
paddle/platform/CMakeLists.txt
paddle/platform/CMakeLists.txt
+1
-1
paddle/platform/dynload/CMakeLists.txt
paddle/platform/dynload/CMakeLists.txt
+1
-1
paddle/pybind/protobuf.cc
paddle/pybind/protobuf.cc
+2
-1
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+4
-1
未找到文件。
CMakeLists.txt
浏览文件 @
672cdc21
...
...
@@ -141,7 +141,6 @@ include(external/boost) # download boost
include
(
external/any
)
# download libn::any
include
(
external/eigen
)
# download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/nccl
)
include
(
external/cares
)
include
(
external/grpc
)
...
...
paddle/framework/executor.cc
浏览文件 @
672cdc21
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/reader.h"
#include "paddle/operators/nccl/nccl_gpu_common.h" // platform::Communicator
#include "paddle/platform/place.h"
#include "paddle/platform/profiler.h"
...
...
@@ -53,6 +54,8 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
var
->
GetMutable
<
LoDTensorArray
>
();
}
else
if
(
var_type
==
proto
::
VarDesc
::
PLACE_LIST
)
{
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarDesc
::
NCCL_COM
)
{
var
->
GetMutable
<
platform
::
Communicator
>
();
}
else
if
(
var_type
==
proto
::
VarDesc
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
}
else
{
...
...
@@ -118,13 +121,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
4
)
<<
op
->
DebugStringEx
(
local_scope
);
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
op
->
Type
(),
pool
.
Get
(
place_
));
op
->
Run
(
*
local_scope
,
place_
);
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
<<
memory
::
memory_usage
(
place_
);
...
...
paddle/framework/framework.proto
浏览文件 @
672cdc21
...
...
@@ -129,6 +129,7 @@ message VarDesc {
LOD_TENSOR_ARRAY
=
7
;
PLACE_LIST
=
8
;
READER
=
9
;
NCCL_COM
=
10
;
}
required
string
name
=
1
;
required
VarType
type
=
2
;
...
...
paddle/operators/nccl_op.cc
浏览文件 @
672cdc21
...
...
@@ -31,8 +31,13 @@ class NCCLInitOp : public framework::OperatorBase {
const
auto
&
name
=
Output
(
"Communicator"
);
PADDLE_ENFORCE_NOT_NULL
(
scope
.
FindVar
(
name
),
"Can not find variable '%s' in the scope."
,
name
);
std
::
vector
<
int
>
gpus
=
Attr
<
std
::
vector
<
int
>>
(
"gpus"
);
PADDLE_ENFORCE
(
!
gpus
.
empty
(),
"Attr(gpus) should not be empty."
);
int
count
=
platform
::
GetCUDADeviceCount
();
std
::
vector
<
int
>
gpus
(
count
);
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
gpus
[
i
]
=
i
;
}
PADDLE_ENFORCE
(
!
gpus
.
empty
(),
"NCCL init with 0 gpus."
);
if
(
scope
.
FindVar
(
name
)
==
nullptr
)
{
PADDLE_THROW
(
"Output(Communicator) is needed for ncclInit operator."
);
...
...
@@ -50,11 +55,6 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Communicator"
,
"Create Communicator for communicating between gpus"
);
AddAttr
<
std
::
vector
<
int
>>
(
"gpus"
,
"(vector<int>) GPU id lists"
);
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5 (FP32)) "
"Output data type"
)
.
SetDefault
(
framework
::
proto
::
DataType
::
FP32
);
AddComment
(
R"DOC(
NCCLInit Operator.
...
...
@@ -77,7 +77,7 @@ class NCCLAllReduceOp : public framework::OperatorWithKernel {
ctx
->
HasInput
(
"Communicator"
),
" Input(Communicator) of AllReduce op input should not be NULL"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"
Input(X) of AllReduce op in
put should not be NULL"
);
"
Output(Out) of AllReduce op out
put should not be NULL"
);
auto
x_dims
=
ctx
->
GetInputsDim
(
"X"
);
...
...
paddle/platform/CMakeLists.txt
浏览文件 @
672cdc21
if
(
WITH_GPU
)
cc_library
(
enforce SRCS enforce.cc DEPS
nccl
)
cc_library
(
enforce SRCS enforce.cc DEPS
)
else
()
cc_library
(
enforce SRCS enforce.cc
)
endif
()
...
...
paddle/platform/dynload/CMakeLists.txt
浏览文件 @
672cdc21
cc_library
(
dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce
)
nv_library
(
dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc
DEPS dynamic_loader
nccl
)
DEPS dynamic_loader
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
paddle/pybind/protobuf.cc
浏览文件 @
672cdc21
...
...
@@ -241,7 +241,8 @@ void BindVarDsec(py::module &m) {
.
value
(
"LOD_RANK_TABLE"
,
proto
::
VarDesc
::
LOD_RANK_TABLE
)
.
value
(
"LOD_TENSOR_ARRAY"
,
proto
::
VarDesc
::
LOD_TENSOR_ARRAY
)
.
value
(
"PLACE_LIST"
,
proto
::
VarDesc
::
PLACE_LIST
)
.
value
(
"READER"
,
proto
::
VarDesc
::
READER
);
.
value
(
"READER"
,
proto
::
VarDesc
::
READER
)
.
value
(
"NCCL_COM"
,
proto
::
VarDesc
::
NCCL_COM
);
}
void
BindOpDesc
(
py
::
module
&
m
)
{
...
...
paddle/scripts/docker/build.sh
浏览文件 @
672cdc21
...
...
@@ -34,6 +34,7 @@ function cmake_gen() {
Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
:-
Release
}
${
PYTHON_FLAGS
}
-DWITH_DSO=ON
-DWITH_DOC=OFF
-DWITH_GPU=
${
WITH_GPU
:-
OFF
}
-DWITH_DISTRIBUTE=
${
WITH_DISTRIBUTE
:-
OFF
}
...
...
@@ -57,6 +58,7 @@ EOF
cmake ..
\
-DCMAKE_BUILD_TYPE
=
${
CMAKE_BUILD_TYPE
:-
Release
}
\
${
PYTHON_FLAGS
}
\
-DWITH_DSO
=
ON
\
-DWITH_DOC
=
OFF
\
-DWITH_GPU
=
${
WITH_GPU
:-
OFF
}
\
-DWITH_DISTRIBUTE
=
${
WITH_DISTRIBUTE
:-
OFF
}
\
...
...
@@ -189,6 +191,7 @@ EOF
ldconfig
${
DOCKERFILE_CUDNN_DSO
}
${
DOCKERFILE_GPU_ENV
}
ENV NCCL_LAUNCH_MODE PARALLEL
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
# default command shows the paddle version and exit
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录