Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8eaec5dd
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8eaec5dd
编写于
4月 09, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add BCast and Gather
上级
47a4ec06
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
382 addition
and
3 deletion
+382
-3
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+7
-1
paddle/fluid/framework/details/broad_cast_op_handle.cc
paddle/fluid/framework/details/broad_cast_op_handle.cc
+103
-0
paddle/fluid/framework/details/broad_cast_op_handle.h
paddle/fluid/framework/details/broad_cast_op_handle.h
+54
-0
paddle/fluid/framework/details/broad_cast_op_handle_test.cc
paddle/fluid/framework/details/broad_cast_op_handle_test.cc
+174
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+44
-2
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
8eaec5dd
...
...
@@ -2,8 +2,12 @@ cc_library(var_handle SRCS var_handle.cc DEPS place)
cc_library
(
op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context
)
cc_library
(
scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
)
cc_library
(
fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
)
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
if
(
WITH_GPU
)
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda
)
nv_library
(
broad_cast_op_handle SRCS broad_cast_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
)
endif
()
cc_library
(
computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry
)
cc_library
(
ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base
)
...
...
@@ -11,6 +15,8 @@ cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
if
(
WITH_GPU
)
set
(
multi_devices_graph_builder_deps nccl_all_reduce_op_handle
)
nv_test
(
broad_cast_op_test SRCS broad_cast_op_handle_test.cc DEPS var_handle op_handle_base scope lod_tensor ddim memory
device_context broad_cast_op_handle
)
else
()
set
(
multi_devices_graph_builder_deps
)
endif
()
...
...
paddle/fluid/framework/details/broad_cast_op_handle.cc
0 → 100644
浏览文件 @
8eaec5dd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/broad_cast_op_handle.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
Tensor
*
GetTensorFromVar
(
Variable
*
in_var
)
{
if
(
in_var
->
IsType
<
LoDTensor
>
())
{
return
in_var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
in_var
->
IsType
<
SelectedRows
>
())
{
return
in_var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows"
);
}
return
nullptr
;
}
BCastOpHandle
::
BCastOpHandle
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
ContextMap
&
ctxs
)
:
local_scopes_
(
local_scopes
),
places_
(
places
),
ctxs_
(
ctxs
)
{
for
(
auto
&
p
:
places_
)
{
this
->
dev_ctxes_
[
p
]
=
ctxs_
.
DevCtx
(
p
);
}
}
void
BCastOpHandle
::
RunImpl
()
{
PADDLE_ENFORCE_EQ
(
this
->
inputs_
.
size
(),
1
);
PADDLE_ENFORCE_EQ
(
this
->
outputs_
.
size
(),
places_
.
size
());
// Wait input done, this Wait is asynchronous operation
auto
in_var_handle
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
]);
auto
&
in_place
=
in_var_handle
->
place_
;
if
(
inputs_
[
0
]
->
generated_op_
)
inputs_
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
in_place
]);
auto
iter
=
std
::
find
(
places_
.
begin
(),
places_
.
end
(),
in_place
);
if
(
iter
==
places_
.
end
())
{
PADDLE_THROW
(
"The input of BCast is not in the places_."
);
}
int
offset
=
iter
-
places_
.
begin
();
auto
in_var
=
local_scopes_
[
offset
]
->
FindVar
(
in_var_handle
->
name_
);
Tensor
*
in_tensor
=
GetTensorFromVar
(
in_var
);
for
(
auto
*
out
:
outputs_
)
{
auto
out_handle
=
static_cast
<
VarHandle
*>
(
out
);
auto
&
out_p
=
out_handle
->
place_
;
auto
iter
=
std
::
find
(
places_
.
begin
(),
places_
.
end
(),
out_p
);
if
(
iter
==
places_
.
end
())
{
PADDLE_THROW
(
"The output of BCast is not in the places_."
);
}
int
offset
=
iter
-
places_
.
begin
();
auto
*
s
=
local_scopes_
[
offset
];
auto
out_var
=
s
->
FindVar
(
out_handle
->
name_
);
PADDLE_ENFORCE_EQ
(
out_var
->
Type
(),
in_var
->
Type
(),
""
);
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
in_sr
=
in_var
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
out
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
if
(
in_sr
==
out
)
continue
;
out
->
set_height
(
in_sr
->
height
());
out
->
set_rows
(
in_sr
->
rows
());
out
->
mutable_value
()
->
Resize
(
in_sr
->
value
().
dims
());
out
->
mutable_value
()
->
mutable_data
(
out_p
,
in_sr
->
value
().
type
());
}
else
if
(
in_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
in_lod
=
in_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
out
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
if
(
in_lod
==
out
)
continue
;
out
->
set_lod
(
in_lod
->
lod
());
out
->
Resize
(
in_lod
->
dims
());
out
->
mutable_data
(
out_p
,
in_lod
->
type
());
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows"
);
}
Tensor
*
out_tensor
=
GetTensorFromVar
(
out_var
);
paddle
::
framework
::
TensorCopy
(
*
in_tensor
,
out_p
,
*
(
dev_ctxes_
[
in_place
]),
out_tensor
);
}
}
std
::
string
BCastOpHandle
::
Name
()
const
{
return
"broadcast"
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/broad_cast_op_handle.h
0 → 100644
浏览文件 @
8eaec5dd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
/*
* BroadCast the input to all scope.
*
*/
struct
BCastOpHandle
:
public
OpHandleBase
{
const
std
::
vector
<
Scope
*>
&
local_scopes_
;
const
std
::
vector
<
platform
::
Place
>
&
places_
;
const
platform
::
ContextMap
&
ctxs_
;
BCastOpHandle
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
ContextMap
&
ctxs
);
std
::
string
Name
()
const
override
;
bool
IsMultiDeviceTransfer
()
override
{
return
false
;
};
protected:
void
RunImpl
()
override
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/broad_cast_op_handle_test.cc
0 → 100644
浏览文件 @
8eaec5dd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/broad_cast_op_handle.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/device_context.h"
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
// test data amount
const
f
::
DDim
kDims
=
{
20
,
20
};
class
BroadCastTester
:
public
::
testing
::
Test
{
public:
void
SetUp
()
override
{
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu BroadCast, because the CUDA "
"device count is "
<<
count
;
exit
(
0
);
}
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
gpu_list_
.
emplace_back
(
p
::
CUDAPlace
(
i
));
}
ctxs_
=
new
p
::
ContextMap
(
gpu_list_
);
}
template
<
class
T
>
void
BroadCastInitOp
(
int
gpu_id
=
0
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scope_
.
push_back
(
&
g_scope_
.
NewScope
());
auto
*
out_var
=
local_scope_
[
j
]
->
Var
(
"out"
);
out_var
->
GetMutable
<
T
>
();
}
auto
*
in_var
=
local_scope_
[
gpu_id
]
->
Var
(
"input"
);
in_var
->
GetMutable
<
T
>
();
bc_op_handle_
=
new
f
::
details
::
BCastOpHandle
(
local_scope_
,
gpu_list_
,
*
ctxs_
);
f
::
details
::
VarHandle
*
in_var_handle
=
new
f
::
details
::
VarHandle
();
in_var_handle
->
place_
=
gpu_list_
[
gpu_id
];
in_var_handle
->
name_
=
"input"
;
in_var_handle
->
version_
=
1
;
in_var_handle
->
generated_op_
=
nullptr
;
bc_op_handle_
->
AddInput
(
in_var_handle
);
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
f
::
details
::
VarHandle
*
out_var_handle
=
new
f
::
details
::
VarHandle
();
out_var_handle
->
place_
=
gpu_list_
[
j
];
out_var_handle
->
name_
=
"out"
;
out_var_handle
->
version_
=
2
;
out_var_handle
->
generated_op_
=
bc_op_handle_
;
bc_op_handle_
->
AddOutput
(
out_var_handle
);
}
}
void
BroadCastDestroy
()
{
delete
ctxs_
;
for
(
auto
in
:
bc_op_handle_
->
inputs_
)
{
delete
in
;
}
for
(
auto
out
:
bc_op_handle_
->
outputs_
)
{
delete
out
;
}
delete
bc_op_handle_
;
}
public:
f
::
Scope
g_scope_
;
p
::
ContextMap
*
ctxs_
;
std
::
vector
<
f
::
Scope
*>
local_scope_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
f
::
details
::
BCastOpHandle
*
bc_op_handle_
;
};
TEST_F
(
BroadCastTester
,
BroadCastTestLodTensor
)
{
int
gpu_id
=
0
;
BroadCastInitOp
<
f
::
LoDTensor
>
(
gpu_id
);
auto
in_var
=
local_scope_
[
gpu_id
]
->
Var
(
"input"
);
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
gpu_id
]);
std
::
vector
<
float
>
send_vector
(
f
::
product
(
kDims
),
gpu_id
+
12
);
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
send_vector
[
k
]
=
k
;
}
f
::
LoD
lod
{{
0
,
10
,
20
}};
paddle
::
framework
::
TensorFromVector
<
float
>
(
send_vector
,
*
(
ctxs_
->
DevCtx
(
gpu_list_
[
gpu_id
])),
in_lod_tensor
);
in_lod_tensor
->
set_lod
(
lod
);
bc_op_handle_
->
Run
(
false
);
ctxs_
->
WaitAll
();
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scope_
[
j
]
->
Var
(
"out"
);
auto
out_tensor
=
out_var
->
Get
<
f
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
out_tensor
.
lod
(),
lod
,
"lod is not equal."
);
f
::
Tensor
result_tensor
;
f
::
TensorCopy
(
out_tensor
,
cpu_place
,
*
(
ctxs_
->
DevCtx
(
j
)),
&
result_tensor
);
float
*
ct
=
result_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
],
1e-5
);
}
}
BroadCastDestroy
();
}
TEST_F
(
BroadCastTester
,
BroadCastTestSelectedRows
)
{
int
gpu_id
=
0
;
BroadCastInitOp
<
f
::
SelectedRows
>
(
gpu_id
);
auto
in_var
=
local_scope_
[
gpu_id
]
->
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
gpu_id
]);
int
height
=
kDims
[
0
]
*
2
;
std
::
vector
<
int64_t
>
rows
{
0
,
1
,
2
,
3
,
3
,
0
,
14
,
7
,
3
,
1
,
2
,
4
,
6
,
3
,
1
,
1
,
1
,
1
,
3
,
7
};
in_selected_rows
->
set_height
(
height
);
in_selected_rows
->
set_rows
(
rows
);
std
::
vector
<
float
>
send_vector
(
f
::
product
(
kDims
));
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
send_vector
[
k
]
=
k
;
}
paddle
::
framework
::
TensorFromVector
<
float
>
(
send_vector
,
*
(
ctxs_
->
DevCtx
(
gpu_list_
[
gpu_id
])),
value
);
bc_op_handle_
->
Run
(
false
);
ctxs_
->
WaitAll
();
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scope_
[
j
]
->
Var
(
"out"
);
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
PADDLE_ENFORCE_EQ
(
out_select_rows
.
height
(),
height
,
"height is not equal."
);
for
(
size_t
k
=
0
;
k
<
out_select_rows
.
rows
().
size
();
++
k
)
{
PADDLE_ENFORCE_EQ
(
out_select_rows
.
rows
()[
k
],
rows
[
k
]);
}
f
::
Tensor
result_tensor
;
f
::
TensorCopy
(
rt
,
cpu_place
,
*
(
ctxs_
->
DevCtx
(
j
)),
&
result_tensor
);
float
*
ct
=
result_tensor
.
data
<
float
>
();
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
],
1e-5
);
}
}
BroadCastDestroy
();
}
paddle/fluid/platform/device_context.h
浏览文件 @
8eaec5dd
...
...
@@ -2,17 +2,20 @@
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cublas.h"
...
...
@@ -137,6 +140,45 @@ template <>
struct
DefaultDeviceContextType
<
platform
::
CUDAPinnedPlace
>
{
using
TYPE
=
CUDAPinnedDeviceContext
;
};
class
ContextMap
{
public:
explicit
ContextMap
(
const
std
::
vector
<
platform
::
Place
>&
places
)
{
order_
.
reserve
(
places
.
size
());
for
(
auto
&
p
:
places
)
{
auto
dev
=
boost
::
get
<
CUDAPlace
>
(
p
);
int
dev_id
=
dev
.
device
;
order_
.
emplace_back
(
dev_id
);
contexts_
[
dev_id
].
reset
(
new
CUDADeviceContext
(
dev
));
}
PADDLE_ENFORCE_EQ
(
order_
.
size
(),
contexts_
.
size
(),
"Context Map does not support contain two or more same device"
);
}
DeviceContext
*
DevCtx
(
int
dev_id
)
const
{
return
at
(
dev_id
);
}
DeviceContext
*
DevCtx
(
platform
::
Place
p
)
const
{
return
DevCtx
(
boost
::
get
<
CUDAPlace
>
(
p
).
device
);
}
DeviceContext
*
at
(
platform
::
Place
p
)
const
{
return
this
->
at
(
boost
::
get
<
CUDAPlace
>
(
p
).
device
);
}
DeviceContext
*
at
(
int
dev_id
)
const
{
return
contexts_
.
at
(
dev_id
).
get
();
}
void
WaitAll
()
{
for
(
auto
&
p
:
contexts_
)
{
p
.
second
->
Wait
();
}
}
private:
std
::
unordered_map
<
int
,
std
::
unique_ptr
<
DeviceContext
>>
contexts_
;
std
::
vector
<
int
>
order_
;
};
#endif
#ifdef PADDLE_WITH_MKLDNN
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录