Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
02842cfc
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
02842cfc
编写于
4月 13, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enhance broadcast_op_handle and gather_op_handle
上级
b0267ac9
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
266 addition
and
239 deletion
+266
-239
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+50
-21
paddle/fluid/framework/details/broadcast_op_handle_test.cc
paddle/fluid/framework/details/broadcast_op_handle_test.cc
+78
-73
paddle/fluid/framework/details/gather_op_handle.cc
paddle/fluid/framework/details/gather_op_handle.cc
+68
-63
paddle/fluid/framework/details/gather_op_handle_test.cc
paddle/fluid/framework/details/gather_op_handle_test.cc
+70
-59
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+0
-15
paddle/fluid/framework/details/op_handle_base.h
paddle/fluid/framework/details/op_handle_base.h
+0
-8
未找到文件。
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
02842cfc
...
...
@@ -18,45 +18,74 @@ namespace paddle {
namespace
framework
{
namespace
details
{
Tensor
*
GetTensorFromVar
(
Variable
*
in_var
)
{
if
(
in_var
->
IsType
<
LoDTensor
>
())
{
return
in_var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
in_var
->
IsType
<
SelectedRows
>
())
{
return
in_var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows"
);
}
return
nullptr
;
}
BroadcastOpHandle
::
BroadcastOpHandle
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
)
:
local_scopes_
(
local_scopes
),
places_
(
places
)
{}
void
BroadcastOpHandle
::
RunImpl
()
{
PADDLE_ENFORCE_EQ
(
this
->
inputs_
.
size
(),
1
,
// the input may have dummy var.
std
::
vector
<
VarHandle
*>
in_var_handle
;
for
(
auto
*
in
:
inputs_
)
{
auto
*
out_handle
=
dynamic_cast
<
VarHandle
*>
(
in
);
if
(
out_handle
)
{
in_var_handle
.
push_back
(
out_handle
);
}
}
PADDLE_ENFORCE_EQ
(
in_var_handle
.
size
(),
1
,
"The number of input should be one."
);
// the output may have dummy var.
std
::
vector
<
VarHandle
*>
out_var_handles
;
for
(
auto
*
out
:
outputs_
)
{
auto
*
out_handle
=
dynamic_cast
<
VarHandle
*>
(
out
);
if
(
out_handle
)
{
out_var_handles
.
push_back
(
out_handle
);
}
}
PADDLE_ENFORCE_EQ
(
this
->
outputs_
.
size
(),
places_
.
size
(),
out_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
// Wait input done, this Wait is asynchronous operation
auto
in_var_handle
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
]);
auto
&
in_place
=
in_var_handle
->
place_
;
if
(
inputs_
[
0
]
->
generated_op_
)
{
inputs_
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
in_place
]);
for
(
auto
*
out
:
outputs_
)
{
auto
out_handle
=
static_cast
<
VarHandle
*>
(
out
);
auto
&
out_p
=
out_handle
->
place_
;
inputs_
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
out_p
]);
auto
&
in_place
=
in_var_handle
[
0
]
->
place_
;
if
(
in_var_handle
[
0
]
->
generated_op_
)
{
in_var_handle
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
in_place
]);
for
(
auto
*
out
:
out_var_handles
)
{
auto
&
out_p
=
out
->
place_
;
if
(
platform
::
is_same_place
(
in_place
,
out_p
))
continue
;
in_var_handle
[
0
]
->
generated_op_
->
Wait
(
dev_ctxes_
[
out_p
]);
}
}
auto
in_scope_idx
=
in_var_handle
->
scope_idx_
;
//
auto
in_scope_idx
=
in_var_handle
[
0
]
->
scope_idx_
;
PADDLE_ENFORCE_LT
(
in_scope_idx
,
local_scopes_
.
size
(),
"The input(%s) is not in the local_scopes."
,
in_var_handle
->
name_
);
auto
in_var
=
local_scopes_
[
in_scope_idx
]
->
FindVar
(
in_var_handle
->
name_
);
in_var_handle
[
0
]
->
name_
);
auto
in_var
=
local_scopes_
[
in_scope_idx
]
->
FindVar
(
in_var_handle
[
0
]
->
name_
);
Tensor
*
in_tensor
=
GetTensorFromVar
(
in_var
);
for
(
auto
*
out
:
outputs_
)
{
auto
out_handle
=
static_cast
<
VarHandle
*>
(
out
);
auto
&
out_p
=
out_handle
->
place_
;
auto
out_scope_idx
=
out_handle
->
scope_idx_
;
for
(
auto
*
out
:
out_var_handles
)
{
auto
&
out_p
=
out
->
place_
;
auto
out_scope_idx
=
out
->
scope_idx_
;
PADDLE_ENFORCE_LT
(
out_scope_idx
,
local_scopes_
.
size
(),
"%s is not in the local_scopes "
,
out_handle
->
name_
);
"%s is not in the local_scopes "
,
out
->
name_
);
auto
*
s
=
local_scopes_
[
out_scope_idx
];
auto
out_var
=
s
->
FindVar
(
out
_handle
->
name_
);
auto
out_var
=
s
->
FindVar
(
out
->
name_
);
PADDLE_ENFORCE_EQ
(
out_p
.
which
(),
in_place
.
which
(),
"The place of input and output should be the same."
);
...
...
@@ -89,7 +118,7 @@ void BroadcastOpHandle::RunImpl() {
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
out_p
);
void
*
dst_ptr
=
out_tensor
->
mutable_data
(
out_p
);
void
*
src_ptr
=
in_tensor
->
data
<
void
>
();
int64_t
size
=
in_tensor
->
numel
();
int64_t
size
=
in_tensor
->
numel
()
*
SizeOfType
(
in_tensor
->
type
())
;
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
dev_ctxes_
[
out_p
])
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.cc
浏览文件 @
02842cfc
...
...
@@ -27,8 +27,20 @@ namespace p = paddle::platform;
// test data amount
const
f
::
DDim
kDims
=
{
20
,
20
};
class
BroadcastTester
:
public
::
testing
::
Test
{
public:
struct
TestBroadcastOpHandle
{
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
Scope
*>
local_scopes_
;
Scope
g_scope_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
void
WaitAll
()
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
}
void
InitCtxOnGpu
(
bool
use_gpu
)
{
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
...
...
@@ -57,61 +69,56 @@ class BroadcastTester : public ::testing::Test {
}
}
void
BroadcastInitOp
(
in
t
input_scope_idx
)
{
void
InitBroadcastOp
(
size_
t
input_scope_idx
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scope
_
.
push_back
(
&
g_scope_
.
NewScope
(
));
local_scope_
[
j
]
->
Var
(
"out"
);
local_scope
s_
.
push_back
(
&
(
g_scope_
.
NewScope
()
));
local_scope
s
_
[
j
]
->
Var
(
"out"
);
}
local_scope_
[
input_scope_idx
]
->
Var
(
"input"
);
local_scope
s
_
[
input_scope_idx
]
->
Var
(
"input"
);
bc_op_handle_
=
new
f
::
details
::
BroadcastOpHandle
(
local_scope_
,
gpu_list_
);
op_handle_
.
reset
(
new
BroadcastOpHandle
(
local_scopes_
,
gpu_list_
)
);
f
::
details
::
VarHandle
*
in_var_handle
=
new
f
::
details
::
VarHandle
();
vars_
.
emplace_back
(
new
VarHandle
());
VarHandle
*
in_var_handle
=
static_cast
<
VarHandle
*>
(
vars_
.
back
().
get
());
in_var_handle
->
place_
=
gpu_list_
[
input_scope_idx
];
in_var_handle
->
name_
=
"input"
;
in_var_handle
->
version_
=
1
;
in_var_handle
->
scope_idx_
=
input_scope_idx
;
in_var_handle
->
generated_op_
=
nullptr
;
bc_op_handle_
->
AddInput
(
in_var_handle
);
op_handle_
->
AddInput
(
in_var_handle
);
// add dummy var
vars_
.
emplace_back
(
new
DummyVarHandle
());
DummyVarHandle
*
dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
dummy_var_handle
->
generated_op_
=
nullptr
;
op_handle_
->
AddInput
(
dummy_var_handle
);
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
bc_op_handle_
->
dev_ctxes_
[
gpu_list_
[
j
]]
=
ctxs_
[
j
];
f
::
details
::
VarHandle
*
out_var_handle
=
new
f
::
details
::
VarHandle
();
op_handle_
->
dev_ctxes_
[
gpu_list_
[
j
]]
=
ctxs_
[
j
].
get
();
vars_
.
emplace_back
(
new
VarHandle
());
VarHandle
*
out_var_handle
=
static_cast
<
VarHandle
*>
(
vars_
.
back
().
get
());
out_var_handle
->
place_
=
gpu_list_
[
j
];
out_var_handle
->
name_
=
"out"
;
out_var_handle
->
version_
=
2
;
out_var_handle
->
scope_idx_
=
j
;
bc_op_handle_
->
AddOutput
(
out_var_handle
);
}
}
void
BroadcastOpDestroy
()
{
for
(
auto
in
:
bc_op_handle_
->
inputs_
)
{
delete
in
;
}
for
(
auto
out
:
bc_op_handle_
->
outputs_
)
{
delete
out
;
op_handle_
->
AddOutput
(
out_var_handle
);
}
delete
bc_op_handle_
;
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
delete
ctxs_
[
j
];
}
}
void
WaitAll
()
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
// add dummy var
vars_
.
emplace_back
(
new
DummyVarHandle
());
DummyVarHandle
*
out_dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
out_dummy_var_handle
->
generated_op_
=
nullptr
;
op_handle_
->
AddOutput
(
out_dummy_var_handle
);
}
void
TestBroadcastLodTensor
()
{
int
input_scope_idx
=
0
;
BroadcastInitOp
(
input_scope_idx
);
auto
in_var
=
local_scope_
[
input_scope_idx
]
->
Var
(
"input"
);
void
TestBroadcastLodTensor
(
size_t
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
std
::
vector
<
float
>
send_vector
(
f
::
product
(
kDims
),
input_scope_idx
+
12
);
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
))
);
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
send_vector
[
k
]
=
k
;
}
...
...
@@ -120,13 +127,13 @@ class BroadcastTester : public ::testing::Test {
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
in_lod_tensor
);
in_lod_tensor
->
set_lod
(
lod
);
bc_
op_handle_
->
Run
(
false
);
op_handle_
->
Run
(
false
);
WaitAll
();
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scope_
[
j
]
->
Var
(
"out"
);
auto
out_var
=
local_scope
s
_
[
j
]
->
Var
(
"out"
);
auto
out_tensor
=
out_var
->
Get
<
f
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
out_tensor
.
lod
(),
lod
,
"lod is not equal."
);
...
...
@@ -134,42 +141,37 @@ class BroadcastTester : public ::testing::Test {
f
::
TensorCopy
(
out_tensor
,
cpu_place
,
*
(
ctxs_
[
j
]),
&
result_tensor
);
float
*
ct
=
result_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
],
1e-5
);
for
(
int64_t
i
=
0
;
i
<
f
::
product
(
kDims
);
++
i
)
{
ASSERT_NEAR
(
ct
[
i
],
send_vector
[
i
],
1e-5
);
}
}
BroadcastOpDestroy
();
}
void
TestBroadcastSelectedRows
()
{
int
input_scope_idx
=
0
;
BroadcastInitOp
(
input_scope_idx
);
auto
in_var
=
local_scope_
[
input_scope_idx
]
->
Var
(
"input"
);
void
TestBroadcastSelectedRows
(
size_t
input_scope_idx
)
{
auto
in_var
=
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
int
height
=
kDims
[
0
]
*
2
;
int
height
=
static_cast
<
int
>
(
kDims
[
0
])
*
2
;
std
::
vector
<
int64_t
>
rows
{
0
,
1
,
2
,
3
,
3
,
0
,
14
,
7
,
3
,
1
,
2
,
4
,
6
,
3
,
1
,
1
,
1
,
1
,
3
,
7
};
in_selected_rows
->
set_height
(
height
);
in_selected_rows
->
set_rows
(
rows
);
std
::
vector
<
float
>
send_vector
(
f
::
product
(
kDims
));
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
)
));
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
send_vector
[
k
]
=
k
;
}
paddle
::
framework
::
TensorFromVector
<
float
>
(
send_vector
,
*
(
ctxs_
[
input_scope_idx
]),
value
);
bc_
op_handle_
->
Run
(
false
);
op_handle_
->
Run
(
false
);
WaitAll
();
p
::
CPUPlace
cpu_place
;
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
auto
out_var
=
local_scope_
[
j
]
->
Var
(
"out"
);
auto
out_var
=
local_scope
s
_
[
j
]
->
Var
(
"out"
);
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
...
...
@@ -183,41 +185,44 @@ class BroadcastTester : public ::testing::Test {
f
::
TensorCopy
(
rt
,
cpu_place
,
*
(
ctxs_
[
j
]),
&
result_tensor
);
float
*
ct
=
result_tensor
.
data
<
float
>
();
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
],
1e-5
);
for
(
int64_t
i
=
0
;
i
<
f
::
product
(
kDims
);
++
i
)
{
ASSERT_NEAR
(
ct
[
i
],
send_vector
[
i
],
1e-5
);
}
}
BroadcastOpDestroy
();
}
public:
f
::
Scope
g_scope_
;
std
::
vector
<
p
::
DeviceContext
*>
ctxs_
;
std
::
vector
<
f
::
Scope
*>
local_scope_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
f
::
details
::
BroadcastOpHandle
*
bc_op_handle_
;
};
TEST_F
(
BroadcastTester
,
TestCPUBroadcastTestLodTensor
)
{
InitCtxOnGpu
(
false
);
TestBroadcastLodTensor
();
TEST
(
BroadcastTester
,
TestCPUBroadcastTestLodTensor
)
{
TestBroadcastOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitBroadcastOp
(
input_scope_idx
);
test_op
.
TestBroadcastLodTensor
(
input_scope_idx
);
}
TEST_F
(
BroadcastTester
,
TestCPUBroadcastTestSelectedRows
)
{
InitCtxOnGpu
(
false
);
TestBroadcastSelectedRows
();
TEST
(
BroadcastTester
,
TestCPUBroadcastTestSelectedRows
)
{
TestBroadcastOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitBroadcastOp
(
input_scope_idx
);
test_op
.
TestBroadcastSelectedRows
(
input_scope_idx
);
}
#ifdef PADDLE_WITH_CUDA
TEST_F
(
BroadcastTester
,
TestGPUBroadcastTestLodTensor
)
{
InitCtxOnGpu
(
true
);
TestBroadcastLodTensor
();
TEST
(
BroadcastTester
,
TestGPUBroadcastTestLodTensor
)
{
TestBroadcastOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitBroadcastOp
(
input_scope_idx
);
test_op
.
TestBroadcastLodTensor
(
input_scope_idx
);
}
TEST_F
(
BroadcastTester
,
TestGPUBroadcastTestSelectedRows
)
{
InitCtxOnGpu
(
true
);
TestBroadcastSelectedRows
();
TEST
(
BroadcastTester
,
TestGPUBroadcastTestSelectedRows
)
{
TestBroadcastOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
true
);
test_op
.
InitBroadcastOp
(
input_scope_idx
);
test_op
.
TestBroadcastSelectedRows
(
input_scope_idx
);
}
#endif
...
...
paddle/fluid/framework/details/gather_op_handle.cc
浏览文件 @
02842cfc
...
...
@@ -23,32 +23,54 @@ GatherOpHandle::GatherOpHandle(const std::vector<Scope *> &local_scopes,
:
local_scopes_
(
local_scopes
),
places_
(
places
)
{}
void
GatherOpHandle
::
RunImpl
()
{
// the input may have dummy var.
std
::
vector
<
VarHandle
*>
in_var_handles
;
for
(
auto
*
in
:
inputs_
)
{
auto
*
in_handle
=
dynamic_cast
<
VarHandle
*>
(
in
);
if
(
in_handle
)
{
in_var_handles
.
push_back
(
in_handle
);
}
}
PADDLE_ENFORCE_EQ
(
this
->
inputs_
.
size
(),
places_
.
size
(),
"The number of inputs should be equal to the number of place."
);
PADDLE_ENFORCE_EQ
(
this
->
outputs_
.
size
(),
1
,
in_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
// the output may have dummy var.
std
::
vector
<
VarHandle
*>
out_var_handles
;
for
(
auto
*
out
:
outputs_
)
{
auto
*
out_handle
=
dynamic_cast
<
VarHandle
*>
(
out
);
if
(
out_handle
)
{
out_var_handles
.
push_back
(
out_handle
);
}
}
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1
,
"The number of output should be one."
);
auto
in_0_handle
=
static_cast
<
VarHandle
*>
(
inputs_
[
0
]);
auto
in_0_handle
=
static_cast
<
VarHandle
*>
(
in_var_handles
[
0
]);
auto
pre_in_var
=
local_scopes_
[
in_0_handle
->
scope_idx_
]
->
FindVar
(
in_0_handle
->
name_
);
auto
pre_place
=
in_0_handle
->
place_
;
PADDLE_ENFORCE
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
(),
"Currently, gather_op only can gather SelectedRows."
);
auto
pre_place
=
in_0_handle
->
place_
;
PADDLE_ENFORCE_EQ
(
out_var_handles
[
0
]
->
place_
.
which
(),
pre_place
.
which
(),
"The place of input and output should be the same."
);
// Wait input done, this Wait is asynchronous operation
for
(
auto
*
in
:
inputs_
)
{
if
(
inputs_
[
0
]
->
generated_op_
)
{
auto
&
p
=
static_cast
<
VarHandle
*>
(
in
)
->
place_
;
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
for
(
auto
*
in
:
in_var_handles
)
{
if
(
in
->
generated_op_
)
{
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
in
->
place_
]);
}
}
std
::
vector
<
int64_t
>
out_rows
;
std
::
vector
<
Tensor
*
>
in_tensors
;
std
::
vector
<
Tensor
>
in_tensors
;
std
::
vector
<
platform
::
Place
>
in_places
;
auto
&
pre_in
=
pre_in_var
->
Get
<
framework
::
SelectedRows
>
();
// gather the inputs
for
(
auto
*
in
:
in
puts_
)
{
for
(
auto
*
in
:
in
_var_handles
)
{
auto
in_handle
=
static_cast
<
VarHandle
*>
(
in
);
auto
in_p
=
in_handle
->
place_
;
in_places
.
push_back
(
in_p
);
...
...
@@ -58,63 +80,46 @@ void GatherOpHandle::RunImpl() {
"The place of input should be the same."
);
auto
*
s
=
local_scopes_
[
in_handle
->
scope_idx_
];
auto
in_var
=
s
->
FindVar
(
in_handle
->
name_
);
PADDLE_ENFORCE_EQ
(
in_var
->
Type
(),
pre_in_var
->
Type
(),
auto
&
in_sr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
PADDLE_ENFORCE_EQ
(
in_sr
.
value
().
type
(),
pre_in
.
value
().
type
(),
"The type of input is not consistent."
);
PADDLE_ENFORCE_EQ
(
pre_in
.
height
(),
in_sr
.
height
(),
"The height of inputs is not consistent."
);
PADDLE_ENFORCE_EQ
(
pre_in
.
GetCompleteDims
(),
in_sr
.
GetCompleteDims
(),
,
"The dims of inputs is not consistent."
);
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
pre_in
=
pre_in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in_sr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
in_sr_rows
=
in_sr
.
rows
();
out_rows
.
insert
(
out_rows
.
begin
(),
in_sr_rows
.
begin
(),
in_sr_rows
.
end
());
PADDLE_ENFORCE_EQ
(
pre_in
.
height
(),
in_sr
.
height
(),
"The height of inputs is not consistent."
);
PADDLE_ENFORCE_EQ
(
pre_in
.
GetCompleteDims
(),
in_sr
.
GetCompleteDims
(),
,
"The dims of inputs is not consistent."
);
}
else
if
(
in_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
in_lodtensor
=
in_var
->
Get
<
framework
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
in_lodtensor
.
lod
(),
pre_in
.
lod
(),
"The lod of inputs is not consistent."
);
PADDLE_ENFORCE_EQ
(
in_lodtensor
.
dims
(),
pre_in
.
dims
(),
"The dims of inputs is not consistent."
);
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows."
);
}
in_tensors
.
push_back
(
GetTensorFromVar
(
in_var
));
pre_in_var
=
in_var
;
auto
in_sr_rows
=
in_sr
.
rows
();
out_rows
.
insert
(
out_rows
.
end
(),
in_sr_rows
.
begin
(),
in_sr_rows
.
end
());
in_tensors
.
emplace_back
(
in_sr
.
value
());
}
// write the output
auto
out_handle
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
]);
auto
&
out_place
=
out_handle
->
place_
;
auto
out_scope_idx
=
out_handle
->
scope_idx_
;
auto
out_var
=
local_scopes_
[
out_scope_idx
]
->
FindVar
(
out_handle
->
name_
);
PADDLE_ENFORCE_EQ
(
out_place
.
which
(),
pre_place
.
which
(),
"The place of input and output should be the same."
);
if
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
pre_in
=
pre_in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
out
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
out
->
set_height
(
pre_in
.
height
());
out
->
set_rows
(
out_rows
);
size_t
rows
=
out_rows
.
size
();
DDim
out_dim
=
pre_in
.
GetCompleteDims
();
out_dim
[
0
]
=
static_cast
<
int64_t
>
(
rows
);
out
->
mutable_value
()
->
Resize
(
out_dim
);
out
->
mutable_value
()
->
mutable_data
(
out_place
,
pre_in
.
value
().
type
());
auto
out_tensor
=
out
->
mutable_value
();
// copy
int
s
=
0
,
e
=
0
;
for
(
size_t
j
=
0
;
j
<
in_tensors
.
size
();
++
j
)
{
e
+=
in_tensors
[
j
]
->
dims
()[
0
];
auto
sub_out
=
out_tensor
->
Slice
(
s
,
e
);
paddle
::
framework
::
TensorCopy
(
*
(
in_tensors
[
j
]),
out_place
,
*
(
dev_ctxes_
[
in_places
[
j
]]),
&
sub_out
);
s
=
e
;
}
}
else
if
(
pre_in_var
->
IsType
<
framework
::
LoDTensor
>
())
{
PADDLE_THROW
(
"Currently, Var only can be SelectedRows."
);
}
else
{
PADDLE_THROW
(
"Var should be SelectedRows."
);
auto
&
out_place
=
out_var_handles
[
0
]
->
place_
;
auto
out_scope_idx
=
out_var_handles
[
0
]
->
scope_idx_
;
auto
out_var
=
local_scopes_
[
out_scope_idx
]
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
auto
out
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
out
->
set_height
(
pre_in
.
height
());
out
->
set_rows
(
out_rows
);
size_t
rows
=
out_rows
.
size
();
DDim
out_dim
=
pre_in
.
GetCompleteDims
();
out_dim
[
0
]
=
static_cast
<
int64_t
>
(
rows
);
out
->
mutable_value
()
->
Resize
(
out_dim
);
out
->
mutable_value
()
->
mutable_data
(
out_place
,
pre_in
.
value
().
type
());
Tensor
*
out_tensor
=
out
->
mutable_value
();
// copy
int
s
=
0
,
e
=
0
;
for
(
size_t
j
=
0
;
j
<
in_tensors
.
size
();
++
j
)
{
e
+=
in_tensors
[
j
].
dims
()[
0
];
auto
sub_out
=
out_tensor
->
Slice
(
s
,
e
);
paddle
::
framework
::
TensorCopy
(
in_tensors
[
j
],
out_place
,
*
(
dev_ctxes_
[
in_places
[
j
]]),
&
sub_out
);
s
=
e
;
}
}
...
...
paddle/fluid/framework/details/gather_op_handle_test.cc
浏览文件 @
02842cfc
...
...
@@ -26,14 +26,26 @@ namespace p = paddle::platform;
// test data amount
const
f
::
DDim
kDims
=
{
20
,
20
};
class
GatherTester
:
public
::
testing
::
Test
{
public:
struct
TestGatherOpHandle
{
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
Scope
*>
local_scopes_
;
Scope
g_scope_
;
std
::
unique_ptr
<
OpHandleBase
>
op_handle_
;
std
::
vector
<
std
::
unique_ptr
<
VarHandleBase
>>
vars_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
void
WaitAll
()
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
}
void
InitCtxOnGpu
(
bool
use_gpu
)
{
if
(
use_gpu
)
{
#ifdef PADDLE_WITH_CUDA
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu
Gather
, because the CUDA "
LOG
(
WARNING
)
<<
"Cannot test multi-gpu
Broadcast
, because the CUDA "
"device count is "
<<
count
;
exit
(
0
);
...
...
@@ -56,57 +68,51 @@ class GatherTester : public ::testing::Test {
}
}
void
InitGatherOp
(
in
t
input_scope_idx
)
{
void
InitGatherOp
(
size_
t
input_scope_idx
)
{
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
local_scope
_
.
push_back
(
&
g_scope_
.
NewScope
(
));
local_scope
_
[
j
]
->
Var
(
"inp
ut"
);
local_scope
s_
.
push_back
(
&
(
g_scope_
.
NewScope
()
));
local_scope
s_
[
j
]
->
Var
(
"o
ut"
);
}
local_scope_
[
input_scope_idx
]
->
Var
(
"out"
);
gather_op_handle_
=
new
f
::
details
::
GatherOpHandle
(
local_scope_
,
gpu_list_
);
f
::
details
::
VarHandle
*
out_var_handle
=
new
f
::
details
::
VarHandle
();
out_var_handle
->
place_
=
gpu_list_
[
input_scope_idx
];
out_var_handle
->
name_
=
"out"
;
out_var_handle
->
version_
=
2
;
out_var_handle
->
scope_idx_
=
input_scope_idx
;
out_var_handle
->
generated_op_
=
gather_op_handle_
;
gather_op_handle_
->
AddOutput
(
out_var_handle
);
local_scopes_
[
input_scope_idx
]
->
Var
(
"input"
);
op_handle_
.
reset
(
new
GatherOpHandle
(
local_scopes_
,
gpu_list_
));
// add input
for
(
size_t
j
=
0
;
j
<
gpu_list_
.
size
();
++
j
)
{
gather_op_handle_
->
dev_ctxes_
[
gpu_list_
[
j
]]
=
ctxs_
[
j
];
f
::
details
::
VarHandle
*
in_var_handle
=
new
f
::
details
::
VarHandle
();
op_handle_
->
dev_ctxes_
[
gpu_list_
[
j
]]
=
ctxs_
[
j
].
get
();
vars_
.
emplace_back
(
new
VarHandle
());
VarHandle
*
in_var_handle
=
static_cast
<
VarHandle
*>
(
vars_
.
back
().
get
());
in_var_handle
->
place_
=
gpu_list_
[
j
];
in_var_handle
->
name_
=
"input"
;
in_var_handle
->
version_
=
1
;
in_var_handle
->
scope_idx_
=
j
;
in_var_handle
->
generated_op_
=
nullptr
;
gather_op_handle_
->
AddInput
(
in_var_handle
);
}
}
void
GatherOpDestroy
()
{
for
(
auto
in
:
gather_op_handle_
->
inputs_
)
{
delete
in
;
}
for
(
auto
out
:
gather_op_handle_
->
outputs_
)
{
delete
out
;
}
delete
gather_op_handle_
;
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
delete
ctxs_
[
j
];
op_handle_
->
AddInput
(
in_var_handle
);
}
}
void
WaitAll
()
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
}
// add dummy var
vars_
.
emplace_back
(
new
DummyVarHandle
());
DummyVarHandle
*
in_dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
in_dummy_var_handle
->
generated_op_
=
nullptr
;
op_handle_
->
AddInput
(
in_dummy_var_handle
);
// add output
vars_
.
emplace_back
(
new
VarHandle
());
VarHandle
*
out_var_handle
=
static_cast
<
VarHandle
*>
(
vars_
.
back
().
get
());
out_var_handle
->
place_
=
gpu_list_
[
input_scope_idx
];
out_var_handle
->
name_
=
"out"
;
out_var_handle
->
version_
=
2
;
out_var_handle
->
scope_idx_
=
input_scope_idx
;
op_handle_
->
AddOutput
(
out_var_handle
);
void
TestGatherSelectedRows
()
{
int
output_scope_idx
=
0
;
InitGatherOp
(
output_scope_idx
);
// add dummy var
vars_
.
emplace_back
(
new
DummyVarHandle
());
DummyVarHandle
*
dummy_var_handle
=
static_cast
<
DummyVarHandle
*>
(
vars_
.
back
().
get
());
op_handle_
->
AddOutput
(
dummy_var_handle
);
}
void
TestGatherSelectedRows
(
size_t
output_scope_idx
)
{
int
height
=
kDims
[
0
]
*
2
;
std
::
vector
<
int64_t
>
rows
{
0
,
1
,
2
,
3
,
3
,
0
,
14
,
7
,
3
,
1
,
2
,
4
,
6
,
3
,
1
,
1
,
1
,
1
,
3
,
7
};
...
...
@@ -117,7 +123,7 @@ class GatherTester : public ::testing::Test {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
auto
in_var
=
local_scope_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_var
=
local_scope
s
_
[
input_scope_idx
]
->
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
...
@@ -130,13 +136,21 @@ class GatherTester : public ::testing::Test {
value
->
Resize
(
kDims
);
}
gather_op_handle_
->
Run
(
false
);
auto
out_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"out"
);
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_var
=
local_scopes_
[
output_scope_idx
]
->
Var
(
"input"
);
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
out_selected_rows
->
mutable_value
()
->
ShareDataWith
(
in_selected_rows
->
value
());
op_handle_
->
Run
(
false
);
WaitAll
();
p
::
CPUPlace
cpu_place
;
auto
out_var
=
local_scope_
[
output_scope_idx
]
->
Var
(
"out"
);
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
...
...
@@ -152,28 +166,25 @@ class GatherTester : public ::testing::Test {
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
kDims
);
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
%
send_vector
.
size
()],
1e-5
);
}
GatherOpDestroy
();
}
public:
f
::
Scope
g_scope_
;
std
::
vector
<
p
::
DeviceContext
*>
ctxs_
;
std
::
vector
<
f
::
Scope
*>
local_scope_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
f
::
details
::
GatherOpHandle
*
gather_op_handle_
;
};
TEST_F
(
GatherTester
,
TestCPUGatherTestSelectedRows
)
{
InitCtxOnGpu
(
false
);
TestGatherSelectedRows
();
TEST
(
GatherTester
,
TestCPUGatherTestSelectedRows
)
{
TestGatherOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitGatherOp
(
input_scope_idx
);
test_op
.
TestGatherSelectedRows
(
input_scope_idx
);
}
#ifdef PADDLE_WITH_CUDA
TEST_F
(
GatherTester
,
TestGPUGatherTestSelectedRows
)
{
InitCtxOnGpu
(
true
);
TestGatherSelectedRows
();
TEST
(
GatherTester
,
TestGPUGatherTestSelectedRows
)
{
TestGatherOpHandle
test_op
;
size_t
input_scope_idx
=
0
;
test_op
.
InitCtxOnGpu
(
false
);
test_op
.
InitGatherOp
(
input_scope_idx
);
test_op
.
TestGatherSelectedRows
(
input_scope_idx
);
}
#endif
}
// namespace details
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
02842cfc
...
...
@@ -17,21 +17,6 @@
namespace
paddle
{
namespace
framework
{
namespace
details
{
// GetTensorFromVar is used in broadcast_op handle and gather_op handle, so it
// should be placed in a commonplace. I don't find an appropriate place, so I
// temporarily place it in op_handle_base.
Tensor
*
GetTensorFromVar
(
Variable
*
in_var
)
{
if
(
in_var
->
IsType
<
LoDTensor
>
())
{
return
in_var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
in_var
->
IsType
<
SelectedRows
>
())
{
return
in_var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows"
);
}
return
nullptr
;
}
std
::
string
OpHandleBase
::
DebugString
()
const
{
std
::
stringstream
ss
;
ss
<<
"("
;
...
...
paddle/fluid/framework/details/op_handle_base.h
浏览文件 @
02842cfc
...
...
@@ -17,9 +17,6 @@
#include <vector>
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -27,11 +24,6 @@ namespace paddle {
namespace
framework
{
namespace
details
{
// GetTensorFromVar is used in broadcast_op handle and gather_op handle, so it
// should be placed in a commonplace. I don't find an appropriate place, so I
// temporarily place it in op_handle.
Tensor
*
GetTensorFromVar
(
Variable
*
in_var
);
constexpr
char
kLocalExecScopeName
[]
=
"@LCOAL_SCOPE@"
;
class
OpHandleBase
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录