Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5756d3e5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5756d3e5
编写于
5月 28, 2021
作者:
C
chentianyu03
提交者:
GitHub
5月 28, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify to complex template types in reduce_sum OP and rewrite it's IdentityFunctor struct (#33164)
上级
481ee79f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
28 addition
and
28 deletion
+28
-28
paddle/fluid/operators/reduce_ops/cub_reduce.h
paddle/fluid/operators/reduce_ops/cub_reduce.h
+6
-7
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
+8
-9
paddle/fluid/operators/reduce_ops/reduce_sum_op.cu
paddle/fluid/operators/reduce_ops/reduce_sum_op.cu
+14
-12
未找到文件。
paddle/fluid/operators/reduce_ops/cub_reduce.h
浏览文件 @
5756d3e5
...
...
@@ -366,33 +366,32 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
#undef CUB_BLOCK_DIM_CASE
}
template
<
typename
Tx
,
typename
ReduceOp
,
typename
TransformOp
>
template
<
typename
Tx
,
typename
ReduceOp
,
template
<
typename
,
typename
>
class
TransformOp
>
struct
TensorReduceFunctor
{
const
framework
::
Tensor
&
x
;
framework
::
Tensor
*
y
;
std
::
vector
<
int
>
origin_reduce_dims
;
const
double
&
init
;
const
ReduceOp
&
reducer
;
const
TransformOp
&
transformer
;
gpuStream_t
stream
;
TensorReduceFunctor
(
const
framework
::
Tensor
&
x
,
framework
::
Tensor
*
y
,
std
::
vector
<
int
>
origin_reduce_dims
,
const
double
&
init
,
const
ReduceOp
&
reducer
,
const
TransformOp
&
transformer
,
gpuStream_t
stream
)
const
ReduceOp
&
reducer
,
gpuStream_t
stream
)
:
x
(
x
),
y
(
y
),
origin_reduce_dims
(
origin_reduce_dims
),
init
(
init
),
reducer
(
reducer
),
transformer
(
transformer
),
stream
(
stream
)
{}
template
<
typename
Ty
>
void
apply
()
const
{
const
Ty
&
init_cast
=
static_cast
<
Ty
>
(
init
);
TensorReduce
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
x
,
y
,
origin_reduce_dims
,
init_cast
,
reducer
,
transformer
,
stream
);
TensorReduce
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
<
Tx
,
Ty
>>
(
x
,
y
,
origin_reduce_dims
,
init_cast
,
reducer
,
TransformOp
<
Tx
,
Ty
>
(),
stream
);
}
};
...
...
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
浏览文件 @
5756d3e5
...
...
@@ -119,9 +119,9 @@ REGISTER_OP_CPU_KERNEL(
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
paddle
::
platform
::
complex
64
,
ops
::
SumFunctor
>
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
paddle
::
platform
::
complex
128
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
SumFunctor
>
);
...
...
@@ -130,10 +130,9 @@ using CPUReduceSumGradKernel =
ops
::
ReduceSumGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
T
,
ops
::
SumGradFunctor
,
true
>
;
REGISTER_OP_CPU_KERNEL
(
reduce_sum_grad
,
CPUReduceSumGradKernel
<
bool
>
,
CPUReduceSumGradKernel
<
float
>
,
CPUReduceSumGradKernel
<
double
>
,
CPUReduceSumGradKernel
<
int
>
,
CPUReduceSumGradKernel
<
int64_t
>
,
CPUReduceSumGradKernel
<
paddle
::
platform
::
complex64
>
,
CPUReduceSumGradKernel
<
paddle
::
platform
::
complex128
>
);
REGISTER_OP_CPU_KERNEL
(
reduce_sum_grad
,
CPUReduceSumGradKernel
<
bool
>
,
CPUReduceSumGradKernel
<
float
>
,
CPUReduceSumGradKernel
<
double
>
,
CPUReduceSumGradKernel
<
int
>
,
CPUReduceSumGradKernel
<
int64_t
>
,
CPUReduceSumGradKernel
<
paddle
::
platform
::
complex
<
float
>>
,
CPUReduceSumGradKernel
<
paddle
::
platform
::
complex
<
double
>>
);
paddle/fluid/operators/reduce_ops/reduce_sum_op.cu
浏览文件 @
5756d3e5
...
...
@@ -18,11 +18,13 @@
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
x
,
typename
Ty
=
Tx
>
struct
IdentityFunctor
{
HOSTDEVICE
explicit
inline
IdentityFunctor
()
{}
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
;
}
HOSTDEVICE
inline
Ty
operator
()(
const
Tx
&
x
)
const
{
return
static_cast
<
Ty
>
(
x
);
}
};
template
<
typename
T
>
...
...
@@ -56,13 +58,13 @@ class ReduceSumKernel : public framework::OpKernel<T> {
if
(
out_dtype
>=
0
)
{
framework
::
VisitDataTypeSmall
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
out_dtype
),
TensorReduceFunctor
<
T
,
cub
::
Sum
,
IdentityFunctor
<
T
>
>
(
TensorReduceFunctor
<
T
,
cub
::
Sum
,
IdentityFunctor
>
(
*
input
,
output
,
reduce_dims
,
static_cast
<
double
>
(
0.0
),
cub
::
Sum
(),
IdentityFunctor
<
T
>
(),
stream
));
stream
));
}
else
{
TensorReduce
<
T
,
T
,
cub
::
Sum
,
IdentityFunctor
<
T
>>
(
TensorReduce
<
T
,
T
,
cub
::
Sum
,
IdentityFunctor
<
T
,
T
>>
(
*
input
,
output
,
reduce_dims
,
static_cast
<
T
>
(
0
),
cub
::
Sum
(),
IdentityFunctor
<
T
>
(),
stream
);
IdentityFunctor
<
T
,
T
>
(),
stream
);
}
}
};
...
...
@@ -70,9 +72,9 @@ class ReduceSumKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceSumKernel
<
bool
>
,
ops
::
ReduceSumKernel
<
float
>
,
ops
::
ReduceSumKernel
<
double
>
,
ops
::
ReduceSumKernel
<
int
>
,
ops
::
ReduceSumKernel
<
int64_t
>
,
ops
::
ReduceSumKernel
<
paddle
::
platform
::
complex64
>
,
ops
::
ReduceSumKernel
<
paddle
::
platform
::
complex128
>
);
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceSumKernel
<
bool
>
,
ops
::
ReduceSumKernel
<
float
>
,
ops
::
ReduceSumKernel
<
double
>
,
ops
::
ReduceSumKernel
<
int
>
,
ops
::
ReduceSumKernel
<
int64_t
>
,
ops
::
ReduceSumKernel
<
paddle
::
platform
::
complex
<
float
>
>
,
ops
::
ReduceSumKernel
<
paddle
::
platform
::
complex
<
double
>
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录