Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0d878f1a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d878f1a
编写于
5月 20, 2022
作者:
N
niuliling123
提交者:
GitHub
5月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Delete ElementwiseKernel in BroadcastKernel (#42779)
上级
c5d3bc0e
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
80 addition
and
48 deletion
+80
-48
paddle/phi/kernels/funcs/broadcast_function.h
paddle/phi/kernels/funcs/broadcast_function.h
+6
-16
paddle/phi/kernels/gpu/gelu_grad_kernel.cu
paddle/phi/kernels/gpu/gelu_grad_kernel.cu
+6
-4
paddle/phi/kernels/gpu/gelu_kernel.cu
paddle/phi/kernels/gpu/gelu_kernel.cu
+6
-4
paddle/phi/kernels/gpu/reduce_grad.h
paddle/phi/kernels/gpu/reduce_grad.h
+7
-14
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
+17
-2
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
+34
-3
paddle/phi/kernels/gpu/where_kernel.cu
paddle/phi/kernels/gpu/where_kernel.cu
+1
-2
paddle/phi/kernels/kps/bitwise_kernel.cu
paddle/phi/kernels/kps/bitwise_kernel.cu
+3
-3
未找到文件。
paddle/phi/kernels/funcs/broadcast_function.h
浏览文件 @
0d878f1a
...
...
@@ -585,26 +585,16 @@ void BroadcastKernel(const KPDevice &ctx,
Functor
func
)
{
std
::
vector
<
int
>
dims_size
;
dims_size
.
reserve
(
ins
.
size
());
bool
no_broadcast_flag
=
true
;
for
(
auto
*
in
:
ins
)
{
no_broadcast_flag
&=
ins
[
0
]
->
dims
()
==
in
->
dims
();
dims_size
.
emplace_back
(
in
->
dims
().
size
());
}
if
(
ins
.
size
()
>
0
&&
outs
->
size
()
>
0
)
{
no_broadcast_flag
&=
outs
->
at
(
0
)
->
dims
()
==
ins
[
0
]
->
dims
();
}
if
(
no_broadcast_flag
)
{
phi
::
funcs
::
ElementwiseKernel
<
OutT
,
Functor
,
NumOuts
>
(
ctx
,
ins
,
outs
,
func
);
}
else
{
axis
=
axis
==
-
1
?
*
std
::
max_element
(
dims_size
.
begin
(),
dims_size
.
end
())
-
*
std
::
min_element
(
dims_size
.
begin
(),
dims_size
.
end
())
:
axis
;
BroadcastKernelForDifferentVecSize
<
ET
,
InT
,
OutT
,
Functor
,
NumOuts
>
(
ctx
,
ins
,
outs
,
axis
,
func
);
}
axis
=
axis
==
-
1
?
*
std
::
max_element
(
dims_size
.
begin
(),
dims_size
.
end
())
-
*
std
::
min_element
(
dims_size
.
begin
(),
dims_size
.
end
())
:
axis
;
BroadcastKernelForDifferentVecSize
<
ET
,
InT
,
OutT
,
Functor
,
NumOuts
>
(
ctx
,
ins
,
outs
,
axis
,
func
);
}
template
<
typename
Functor
,
typename
T
,
typename
OutType
=
T
>
...
...
paddle/phi/kernels/gpu/gelu_grad_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -81,11 +81,13 @@ void GeluGradKernel(const Context& dev_ctx,
}
}
#endif
phi
::
funcs
::
BroadcastKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
dev_ctx
,
ins
,
&
outs
,
0
,
GeluWithApproximateGradFunctor
<
T
>
());
using
Functor
=
GeluWithApproximateGradFunctor
<
T
>
;
phi
::
funcs
::
ElementwiseKernel
<
T
,
Functor
,
1
>
(
dev_ctx
,
ins
,
&
outs
,
Functor
());
}
else
{
phi
::
funcs
::
BroadcastKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
dev_ctx
,
ins
,
&
outs
,
0
,
GeluWithoutApproximateGradFunctor
<
T
>
());
using
Functor
=
GeluWithoutApproximateGradFunctor
<
T
>
;
phi
::
funcs
::
ElementwiseKernel
<
T
,
Functor
,
1
>
(
dev_ctx
,
ins
,
&
outs
,
Functor
());
}
}
...
...
paddle/phi/kernels/gpu/gelu_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -71,11 +71,13 @@ void GeluKernel(const Context& dev_ctx,
}
}
#endif
phi
::
funcs
::
BroadcastKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
dev_ctx
,
ins
,
&
outs
,
0
,
GeluWithApproximateFunctor
<
T
>
());
using
Functor
=
GeluWithApproximateFunctor
<
T
>
;
phi
::
funcs
::
ElementwiseKernel
<
T
,
Functor
,
1
>
(
dev_ctx
,
ins
,
&
outs
,
Functor
());
}
else
{
phi
::
funcs
::
BroadcastKernel
<
ElementwiseType
::
kBinary
,
T
,
T
>
(
dev_ctx
,
ins
,
&
outs
,
0
,
GeluWithoutApproximateFunctor
<
T
>
());
using
Functor
=
GeluWithoutApproximateFunctor
<
T
>
;
phi
::
funcs
::
ElementwiseKernel
<
T
,
Functor
,
1
>
(
dev_ctx
,
ins
,
&
outs
,
Functor
());
}
}
...
...
paddle/phi/kernels/gpu/reduce_grad.h
浏览文件 @
0d878f1a
...
...
@@ -43,22 +43,19 @@ void ReduceGrad(const GPUContext& dev_ctx,
}));
}
template
<
typename
T
,
typename
Context
,
template
<
typename
,
typename
>
class
TransformOp
>
template
<
typename
T
,
typename
OutT
,
typename
Context
,
typename
Functor
>
void
ReduceGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
out_grad
,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
x_grad
)
{
DenseTensor
*
x_grad
,
Functor
functor
)
{
auto
*
in_x
=
&
x
;
auto
*
d_out
=
&
out_grad
;
auto
*
d_x
=
x_grad
;
auto
pt_out_dtype
=
x
.
dtype
();
// get reduce_dim and reduce_num for reduce_mean_grad
int
dim_size
=
in_x
->
dims
().
size
();
std
::
vector
<
int
>
reduce_dims
=
...
...
@@ -79,14 +76,10 @@ void ReduceGradKernel(const Context& dev_ctx,
auto
pt_d_out
=
new_d_out
;
auto
pt_d_x
=
*
d_x
;
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
phi
::
ReduceGrad
<
T
,
TransformOp
<
T
,
MPType
>>
(
dev_ctx
,
&
pt_d_out
,
&
pt_d_x
,
pt_out_dtype
,
TransformOp
<
T
,
MPType
>
(
reduce_num
));
std
::
vector
<
const
DenseTensor
*>
inputs
=
{
&
pt_d_out
};
std
::
vector
<
DenseTensor
*>
outputs
=
{
&
pt_d_x
};
funcs
::
BroadcastKernel
<
phi
::
ElementwiseType
::
kUnary
,
T
,
OutT
>
(
dev_ctx
,
inputs
,
&
outputs
,
0
,
functor
);
}
}
// namespace phi
...
...
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -29,8 +29,23 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
x_grad
)
{
ReduceGradKernel
<
T
,
Context
,
kps
::
DivideFunctor
>
(
dev_ctx
,
x
,
out_grad
,
dims
,
keep_dim
,
reduce_all
,
x_grad
);
int
dim_size
=
x
.
dims
().
size
();
std
::
vector
<
int
>
reduce_dims
=
funcs
::
details
::
GetReduceDim
(
dims
,
dim_size
,
reduce_all
);
int
reduce_num
=
1
;
for
(
auto
i
:
reduce_dims
)
{
reduce_num
*=
(
x
.
dims
())[
i
];
}
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
ReduceGradKernel
<
T
,
T
,
Context
,
kps
::
DivideFunctor
<
T
,
MPType
>>
(
dev_ctx
,
x
,
out_grad
,
dims
,
keep_dim
,
reduce_all
,
x_grad
,
kps
::
DivideFunctor
<
T
,
MPType
>
(
reduce_num
));
}
}
// namespace phi
...
...
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -29,8 +29,40 @@ void ReduceSumGradKernel(const Context& dev_ctx,
bool
keep_dim
,
bool
reduce_all
,
DenseTensor
*
x_grad
)
{
ReduceGradKernel
<
T
,
Context
,
kps
::
IdentityFunctor
>
(
dev_ctx
,
x
,
out_grad
,
dims
,
keep_dim
,
reduce_all
,
x_grad
);
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
auto
out_dtype
=
x
.
dtype
();
auto
*
in_x
=
&
x
;
auto
*
d_out
=
&
out_grad
;
auto
*
d_x
=
x_grad
;
// get reduce_dim and reduce_num for reduce_mean_grad
int
dim_size
=
in_x
->
dims
().
size
();
std
::
vector
<
int
>
reduce_dims
=
funcs
::
details
::
GetReduceDim
(
dims
,
dim_size
,
reduce_all
);
auto
update_dims
=
vectorize
(
d_x
->
dims
());
int
reduce_num
=
1
;
for
(
auto
i
:
reduce_dims
)
{
reduce_num
*=
(
in_x
->
dims
())[
i
];
update_dims
[
i
]
=
1
;
}
// make new tensor
DenseTensor
new_d_out
(
d_out
->
dtype
());
new_d_out
.
ShareDataWith
(
*
d_out
);
new_d_out
.
Resize
(
phi
::
make_ddim
(
update_dims
));
dev_ctx
.
Alloc
(
d_x
,
x
.
dtype
());
auto
pt_out_dtype
=
x
.
dtype
();
auto
pt_d_out
=
new_d_out
;
auto
pt_d_x
=
*
d_x
;
std
::
vector
<
const
DenseTensor
*>
inputs
=
{
&
pt_d_out
};
std
::
vector
<
DenseTensor
*>
outputs
=
{
&
pt_d_x
};
phi
::
ReduceGrad
<
T
,
kps
::
IdentityFunctor
<
T
,
MPType
>>
(
dev_ctx
,
&
pt_d_out
,
&
pt_d_x
,
pt_out_dtype
,
kps
::
IdentityFunctor
<
T
,
MPType
>
());
}
}
// namespace phi
...
...
@@ -48,4 +80,3 @@ PD_REGISTER_KERNEL(sum_grad,
int64_t
,
phi
::
dtype
::
complex
<
float
>
,
phi
::
dtype
::
complex
<
double
>
)
{}
paddle/phi/kernels/gpu/where_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -40,8 +40,7 @@ void WhereKernel(const Context& ctx,
ctx
.
template
Alloc
<
T
>(
out
);
CondFunctor
<
T
>
func
;
funcs
::
BroadcastKernel
<
ElementwiseType
::
kTernary
,
T
,
T
>
(
ctx
,
ins
,
&
outs
,
-
1
,
func
);
funcs
::
ElementwiseKernel
<
T
,
CondFunctor
<
T
>
,
1
>
(
ctx
,
ins
,
&
outs
,
func
);
}
}
// namespace phi
...
...
paddle/phi/kernels/kps/bitwise_kernel.cu
浏览文件 @
0d878f1a
...
...
@@ -51,9 +51,9 @@ void BitwiseNotKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
T
>(
out
);
std
::
vector
<
const
DenseTensor
*>
ins
=
{
&
x
};
std
::
vector
<
DenseTensor
*>
outs
=
{
out
};
funcs
::
BitwiseNotFunctor
<
T
>
func
;
funcs
::
BroadcastKernel
<
ElementwiseType
::
kUnary
,
T
,
T
>
(
dev_ctx
,
ins
,
&
outs
,
-
1
,
func
);
funcs
::
BitwiseNotFunctor
<
T
>
unary_
func
;
funcs
::
ElementwiseKernel
<
T
,
funcs
::
BitwiseNotFunctor
<
T
>
>
(
dev_ctx
,
ins
,
&
outs
,
unary_
func
);
}
}
// namespace phi
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录