Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e4b1ec1b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e4b1ec1b
编写于
9月 28, 2020
作者:
A
ashburnlee
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add cuda support for unique op.
上级
1e10acd3
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
92 addition
and
35 deletion
+92
-35
paddle/fluid/operators/unique_op.cu
paddle/fluid/operators/unique_op.cu
+92
-35
未找到文件。
paddle/fluid/operators/unique_op.cu
浏览文件 @
e4b1ec1b
...
...
@@ -18,10 +18,13 @@ limitations under the License. */
#include <thrust/scatter.h>
#include <thrust/unique.h>
#include <iostream>
#include "paddle/fluid/operators/unique_op.h" // TransComute
#include <vector>
#include "paddle/fluid/framework/tensor_util.h" // TensorToVector()
#include "paddle/fluid/operators/unique_op.h" // TransComute()
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
// Binary function 'less than'
...
...
@@ -89,6 +92,70 @@ struct BinaryNotEqual {
}
};
// index_select() function for Tensor
template
<
typename
InT
>
void
IndexSelect
(
const
framework
::
ExecutionContext
&
context
,
const
Tensor
&
input
,
const
Tensor
&
index
,
Tensor
*
output
,
int
dim
)
{
auto
input_dim
=
input
.
dims
();
auto
input_dim_size
=
input_dim
.
size
();
auto
output_dim
=
output
->
dims
();
auto
slice_size
=
1
;
for
(
auto
i
=
dim
+
1
;
i
<
input_dim_size
;
i
++
)
{
slice_size
*=
input_dim
[
i
];
}
auto
input_width
=
slice_size
*
input_dim
[
dim
];
auto
output_width
=
slice_size
*
output_dim
[
dim
];
auto
outer_nums
=
1
;
for
(
auto
i
=
0
;
i
<
dim
;
i
++
)
{
outer_nums
*=
input_dim
[
i
];
}
auto
index_size
=
index
.
dims
()[
0
];
std
::
vector
<
InT
>
input_vec
;
std
::
vector
<
int32_t
>
index_vec
;
TensorToVector
(
input
,
context
.
device_context
(),
&
input_vec
);
TensorToVector
(
index
,
context
.
device_context
(),
&
index_vec
);
std
::
vector
<
InT
>
out_vec
(
output
->
numel
());
for
(
int
i
=
0
;
i
<
index_size
;
i
++
)
{
PADDLE_ENFORCE_GE
(
index_vec
[
i
],
0
,
platform
::
errors
::
InvalidArgument
(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value."
,
input_dim
[
dim
],
index_vec
[
i
]));
PADDLE_ENFORCE_LT
(
index_vec
[
i
],
input_dim
[
dim
],
platform
::
errors
::
InvalidArgument
(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value."
,
input_dim
[
dim
],
index_vec
[
i
]));
}
for
(
auto
i
=
0
;
i
<
outer_nums
;
i
++
)
{
auto
input_start_offset
=
i
*
input_width
;
auto
output_start_offset
=
i
*
output_width
;
for
(
auto
j
=
0
;
j
<
index_size
;
j
++
)
{
int32_t
index_value
=
index_vec
[
j
];
for
(
auto
k
=
0
;
k
<
slice_size
;
k
++
)
{
out_vec
[
output_start_offset
+
j
*
slice_size
+
k
]
=
input_vec
[
input_start_offset
+
index_value
*
slice_size
+
k
];
}
}
}
output
->
mutable_data
<
InT
>
(
context
.
GetPlace
());
framework
::
TensorFromVector
(
out_vec
,
context
.
device_context
(),
output
);
output
->
Resize
(
output_dim
);
}
/// The core logic of computing Unique
template
<
typename
InT
,
typename
equal_T
,
typename
not_equal_T
>
static
void
ComputeUniqueFlatten
(
const
framework
::
ExecutionContext
&
context
,
...
...
@@ -178,10 +245,11 @@ static void ComputeUniqueFlatten(const framework::ExecutionContext& context,
template
<
typename
InT
,
typename
equal_T
,
typename
not_equal_T
>
static
void
ComputeUniqueDims
(
const
framework
::
ExecutionContext
&
context
,
framework
::
Tensor
*
sorted_indices
,
InT
*
sorted_indices_data
,
framework
::
Tensor
*
out
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
equal_T
equal
,
not_equal_T
not_equal
,
int64_t
row
)
{
int32_t
*
sorted_indices_data
,
framework
::
Tensor
*
out
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
equal_T
equal
,
not_equal_T
not_equal
,
int64_t
row
)
{
// 1. inverse indices: 'inverse'
Tensor
*
inverse
=
context
.
Output
<
Tensor
>
(
"Index"
);
inverse
->
Resize
(
framework
::
make_ddim
({
row
}));
/// in.shape[0]
...
...
@@ -212,18 +280,15 @@ static void ComputeUniqueDims(const framework::ExecutionContext& context,
sorted_indices_data
;
thrust
::
device_ptr
<
int32_t
>
range_data_ptr_dev
(
range_data_ptr
);
range_data_ptr_dev
[
num_out
]
=
row
;
sorted_indices
->
Resize
(
framework
::
make_ddim
({
num_out
}));
// 3. counts: 'counts'
Tensor
*
counts
=
context
.
Output
<
Tensor
>
(
"Counts"
);
counts
->
Resize
(
framework
::
make_ddim
({
row
}));
counts
->
Resize
(
framework
::
make_ddim
({
num_out
}));
auto
count_data
=
counts
->
mutable_data
<
int32_t
>
(
context
.
GetPlace
());
thrust
::
fill
(
thrust
::
device
,
count_data
,
count_data
+
row
,
0
);
thrust
::
adjacent_difference
(
thrust
::
device
,
range_data_ptr
+
1
,
range_data_ptr
+
row
+
1
,
count_data
);
/**
* TODO(ashburnlee) implement index_select() to get 'out' and reshape back
*/
}
// Calculate unique when 'dim' is not set
...
...
@@ -244,7 +309,7 @@ static void UniqueDimsCUDATensor(const framework::ExecutionContext& context,
framework
::
Tensor
*
out
,
bool
return_index
,
bool
return_inverse
,
bool
return_counts
,
int
axis
)
{
// Transpose & reshape
//
1.
Transpose & reshape
// Transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std
::
vector
<
int
>
permute
(
in
.
dims
().
size
());
std
::
iota
(
permute
.
begin
(),
permute
.
end
(),
0
);
...
...
@@ -258,60 +323,53 @@ static void UniqueDimsCUDATensor(const framework::ExecutionContext& context,
in_trans
.
Resize
(
in_trans_dims
);
in_trans
.
mutable_data
<
InT
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
cuda_device_context
();
TransCompute
<
DeviceContext
,
InT
>
(
in
.
dims
().
size
(),
//
维度个数
dev_ctx
,
//
设备
in
,
//
原始t
ensor
&
in_trans
,
// Reshape 后的tensor 被修改
permute
);
// axis 的索引
TransCompute
<
DeviceContext
,
InT
>
(
in
.
dims
().
size
(),
//
num of dims
dev_ctx
,
//
device
in
,
//
original T
ensor
&
in_trans
,
// Tensor after reshape
permute
);
// index of axis
// Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
framework
::
DDim
in_trans_flat_dims
=
framework
::
flatten_to_2d
(
in_trans_dims
,
1
);
in_trans
.
Resize
(
in_trans_flat_dims
);
// in_trans 2D
// in_trans(unsorted) as 'in'
// now 'in_trans is 2D
int64_t
col
=
in_trans
.
dims
()[
1
];
int64_t
row
=
in_trans
.
dims
()[
0
];
const
InT
*
in_trans_data
=
in_trans
.
data
<
InT
>
();
const
InT
*
in_trans_data
=
in_trans
.
data
<
InT
>
();
// read only
// Tensor in_trans_hat;
// framework::TensorCopy(in_trans, context.GetPlace(), &in_trans_hat);
auto
in_trans_data
=
in_trans
.
mutable_data
<
InT
>
(
context
.
GetPlace
());
Tensor
*
sorted_indices
=
context
.
Output
<
Tensor
>
(
"Indices"
);
sorted_indices
->
Resize
(
framework
::
make_ddim
({
row
}));
auto
sorted_indices_data
=
sorted_indices
->
mutable_data
<
int32_t
>
(
context
.
GetPlace
());
// 2. Calculate 'sorted_indices', 'inverse', 'counts'
// Init index and sort
thrust
::
sequence
(
thrust
::
device
,
sorted_indices_data
,
sorted_indices_data
+
row
);
thrust
::
sort
(
thrust
::
device
,
sorted_indices_data
,
sorted_indices_data
+
row
,
LessThan
<
InT
>
(
col
,
in_trans_data
));
ComputeUniqueDims
<
InT
>
(
context
,
sorted_indices
,
sorted_indices_data
,
out
,
return_index
,
return_inverse
,
return_counts
,
BinaryEqual
<
InT
>
(
col
,
in_trans_data
),
BinaryNotEqual
<
InT
>
(
col
,
in_trans_data
),
row
);
/**
* NOTE: If index_select() is implemented and called in ComputeUniqueDims(),
* the code below can be deleted.
*/
// Reshape 'out' back
std
::
vector
<
framework
::
Tensor
>
in_trans_unbind
=
Unbind
(
in_trans_hat
);
math
::
ConcatFunctor
<
DeviceContext
,
InT
>
concat_functor
;
// 3. Select indices and reshape back to get 'out'
framework
::
Tensor
out_trans
;
std
::
vector
<
int64_t
>
out_trans_dims_vec
=
in_trans_dims_vec
;
out_trans_dims_vec
[
0
]
=
in_trans_unbind
.
size
();
out_trans_dims_vec
[
0
]
=
sorted_indices
->
numel
();
out_trans
.
Resize
(
framework
::
make_ddim
(
out_trans_dims_vec
));
out_trans
.
mutable_data
<
InT
>
(
context
.
GetPlace
());
IndexSelect
<
InT
>
(
context
,
in_trans
,
*
sorted_indices
,
&
out_trans
,
0
);
std
::
swap
(
out_trans_dims_vec
[
0
],
out_trans_dims_vec
[
axis
]);
out
->
Resize
(
framework
::
make_ddim
(
out_trans_dims_vec
));
out
->
mutable_data
<
InT
>
(
context
.
GetPlace
());
concat_functor
(
dev_ctx
,
in_trans_unbind
,
0
,
&
out_trans
);
std
::
vector
<
framework
::
Tensor
>
out_trans_unbind
=
Unbind
(
out_trans
);
math
::
ConcatFunctor
<
DeviceContext
,
InT
>
concat_functor
;
concat_functor
(
dev_ctx
,
out_trans_unbind
,
0
,
&
out_trans
);
TransCompute
<
DeviceContext
,
InT
>
(
out_trans
.
dims
().
size
(),
dev_ctx
,
out_trans
,
out
,
permute
);
}
...
...
@@ -353,7 +411,6 @@ class UniqueKernel<platform::CUDADeviceContext, InT>
return_inverse
,
return_counts
);
}
else
{
int
axis
=
axis_vec
[
0
];
// 已指明 DeviceContext 为 CUDADeviceContext, 写法正确
UniqueDimsCUDATensor
<
platform
::
CUDADeviceContext
,
InT
>
(
context
,
*
x
,
out
,
return_index
,
return_inverse
,
return_counts
,
axis
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录