Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7e1155ed
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7e1155ed
编写于
3月 24, 2022
作者:
N
niuliling123
提交者:
GitHub
3月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add is_mean param for mean op (#40757)
上级
521cded2
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
91 addition
and
42 deletion
+91
-42
paddle/fluid/operators/mean_op.cu
paddle/fluid/operators/mean_op.cu
+4
-3
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
+2
-2
paddle/phi/kernels/funcs/reduce_function.h
paddle/phi/kernels/funcs/reduce_function.h
+73
-32
paddle/phi/kernels/gpu/reduce.h
paddle/phi/kernels/gpu/reduce.h
+10
-3
paddle/phi/kernels/gpu/reduce_kernel.cu
paddle/phi/kernels/gpu/reduce_kernel.cu
+2
-2
未找到文件。
paddle/fluid/operators/mean_op.cu
浏览文件 @
7e1155ed
...
...
@@ -65,9 +65,10 @@ class MeanCUDAKernel : public framework::OpKernel<T> {
for
(
decltype
(
rank
)
i
=
0
;
i
<
rank
;
++
i
)
{
reduce_dims
.
push_back
(
i
);
}
TensorReduceImpl
<
T
,
T
,
kernel_primitives
::
AddFunctor
,
Div
>
(
context
.
cuda_device_context
(),
*
input
,
output
,
Div
(
numel
),
reduce_dims
,
stream
);
TensorReduceImpl
<
T
,
T
,
kernel_primitives
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
context
.
cuda_device_context
(),
*
input
,
output
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
,
true
);
}
};
...
...
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
浏览文件 @
7e1155ed
...
...
@@ -33,12 +33,12 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
const
framework
::
Tensor
&
x
,
framework
::
Tensor
*
y
,
const
TransformOp
&
transform
,
const
std
::
vector
<
int
>&
origin_reduce_dims
,
gpuStream_t
stream
)
{
gpuStream_t
stream
,
bool
is_mean
=
false
)
{
y
->
mutable_data
<
Ty
>
(
x
.
place
());
phi
::
funcs
::
ReduceKernel
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
static_cast
<
const
phi
::
GPUContext
&>
(
dev_ctx
),
x
,
y
,
transform
,
origin_reduce_dims
);
origin_reduce_dims
,
is_mean
);
}
}
// namespace operators
...
...
paddle/phi/kernels/funcs/reduce_function.h
浏览文件 @
7e1155ed
...
...
@@ -453,25 +453,20 @@ struct ReduceConfig {
void
SetReduceType
()
{
int
rank
=
x_dim
.
size
();
int
reduce_rank
=
reduce_dim
.
size
();
bool
is_last_dim
=
(
rank
==
2
)
&&
(
reduce_rank
==
1
)
&&
(
reduce_dim
[
0
]
==
1
);
if
(
rank
==
reduce_rank
||
is_last_dim
)
{
#ifdef PADDLE_WITH_XPU_KP
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceAny
)
;
bool
not_higher
=
x_dim
[
0
]
>
1
;
#else
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceLastDim
);
int
device_id
=
paddle
::
platform
::
GetCurrentDeviceId
();
int
max_grid_z
=
phi
::
backends
::
gpu
::
GetGpuMaxGridDimSize
(
device_id
)[
2
];
bool
not_higher
=
x_dim
[
0
]
>=
max_grid_z
;
#endif
if
(
reduce_last_dim
&&
(
reduce_rank
==
1
))
{
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceLastDim
);
}
else
if
(
reduce_rank
==
1
)
{
// ReduceFirstDim and reduceSecondDim
#ifdef PADDLE_WITH_XPU_KP
if
(
reduce_dim
[
0
]
==
0
)
{
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceHigherDim
);
}
else
{
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceHigherDim
);
if
(
rank
==
3
&&
not_higher
)
{
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceAny
);
}
#else
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceHigherDim
);
#endif
}
else
{
reduce_type
=
static_cast
<
int
>
(
ReduceType
::
kReduceAny
);
}
...
...
@@ -648,7 +643,8 @@ __global__ void ReduceAnyKernel(const Tx* x,
bool
reduce_last_dim
,
const
Calculator
reduce_index_calculator
,
const
Calculator
left_index_calculator
,
const
kps
::
DimConfig
dim
)
{
const
kps
::
DimConfig
dim
,
bool
is_mean
)
{
int
input_idx
,
left_idx
,
stride
;
int
block_size
=
0
;
bool
need_store
=
true
;
...
...
@@ -752,7 +748,9 @@ __global__ void ReduceAnyKernel(const Tx* x,
kps
::
Reduce
<
MPType
,
1
,
1
,
1
,
ReduceOp
,
kps
::
details
::
kGlobalMode
>
(
&
reduce_var
,
&
reduce_var
,
reducer
,
reduce_last_dim
);
if
(
is_mean
)
{
reduce_var
=
reduce_var
/
static_cast
<
MPType
>
(
reduce_num
);
}
Ty
result
=
static_cast
<
Ty
>
(
reduce_var
);
kps
::
details
::
WriteData
<
Ty
>
(
y
+
store_offset
+
i
,
&
result
,
static_cast
<
int
>
(
need_store
));
...
...
@@ -772,7 +770,9 @@ __global__ void ReduceHigherDimKernel(const Tx* x,
int
reduce_num
,
int
left_num
,
int
blocking_size
,
const
kps
::
DimConfig
dim
)
{
const
kps
::
DimConfig
dim
,
int
mean_div
,
bool
is_mean
)
{
// when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this
// function will be used
auto
block
=
ReduceIndexMapping
<
false
>
(
dim
);
...
...
@@ -806,6 +806,9 @@ __global__ void ReduceHigherDimKernel(const Tx* x,
kps
::
details
::
ReduceMode
::
kLocalMode
>
(
&
reduce_var
,
&
reduce_compute
,
reducer
,
false
);
}
if
(
is_mean
)
{
reduce_var
=
reduce_var
/
static_cast
<
MPType
>
(
mean_div
);
}
Ty
result
=
static_cast
<
Ty
>
(
reduce_var
);
kps
::
WriteData
<
Ty
,
1
,
1
,
1
,
false
>
(
y
+
store_offset
+
idx
,
&
result
,
block
.
BlockDimX
());
...
...
@@ -831,6 +834,10 @@ __global__ void ReduceHigherDimKernel(const Tx* x,
kps
::
details
::
ReduceMode
::
kLocalMode
>
(
&
reduce_var
,
&
reduce_compute
,
reducer
,
false
);
}
if
(
is_mean
)
{
reduce_var
=
reduce_var
/
static_cast
<
MPType
>
(
mean_div
);
}
Ty
result
=
static_cast
<
Ty
>
(
reduce_var
);
kps
::
WriteData
<
Ty
,
1
,
1
,
1
,
true
>
(
y
+
store_offset
+
idx
,
&
result
,
dim
.
rem_x
);
...
...
@@ -848,7 +855,8 @@ static void LaunchReduceKernel(const Tx* x_data,
const
TransformOp
&
transform
,
MPType
init
,
KPStream
stream
,
ReduceConfig
<
Ty
>
config
)
{
ReduceConfig
<
Ty
>
config
,
bool
is_mean
=
false
)
{
if
(
config
.
reduce_type
==
kReduceLastDim
)
{
int
stride_reduce
=
1
;
int
stride_left
=
config
.
reduce_num
;
...
...
@@ -887,7 +895,8 @@ static void LaunchReduceKernel(const Tx* x_data,
config
.
reduce_last_dim
,
reduce_index_calculator
,
left_index_calculator
,
dim
);
dim
,
is_mean
&&
(
!
config
.
should_reduce_again
));
}
else
{
int
reduce_rank
=
config
.
reduce_strides
.
size
();
...
...
@@ -930,7 +939,8 @@ static void LaunchReduceKernel(const Tx* x_data,
config
.
reduce_last_dim
,
reduce_index_calculator
,
left_index_calculator
,
dim
);
dim
,
is_mean
&&
(
!
config
.
should_reduce_again
));
}
if
(
config
.
should_reduce_again
)
{
...
...
@@ -950,15 +960,18 @@ static void LaunchReduceKernel(const Tx* x_data,
kps
::
DimConfig
(
grid
.
x
,
grid
.
y
,
grid
.
z
,
block
.
x
,
config
.
grid
.
y
,
0
);
dim
.
SetRem
(
config
.
left_num
%
block
.
x
,
0
,
0
);
#ifdef PADDLE_WITH_XPU_KP
grid
=
8
;
block
=
64
;
int
grid_size
=
8
;
int
block_size
=
64
;
#else
auto
grid_size
=
grid
;
auto
block_size
=
block
;
#endif
ReduceHigherDimKernel
<
Ty
,
Ty
,
MPType
,
ReduceOp
,
kps
::
IdentityFunctor
<
Ty
,
MPType
>><<<
grid
,
block
,
0
,
stream
>>>
(
kps
::
IdentityFunctor
<
Ty
,
MPType
>><<<
grid
_size
,
block_size
,
0
,
stream
>>>
(
config
.
output_data
,
y_data
,
reducer
,
...
...
@@ -967,7 +980,9 @@ static void LaunchReduceKernel(const Tx* x_data,
config
.
grid
.
y
,
config
.
left_num
,
config
.
grid
.
y
,
dim
);
dim
,
config
.
reduce_num
,
is_mean
);
}
}
...
...
@@ -1034,7 +1049,8 @@ void ReduceKernel(const KPDevice& dev_ctx,
const
phi
::
DenseTensor
&
x
,
phi
::
DenseTensor
*
y
,
const
TransformOp
&
transform
,
const
std
::
vector
<
int
>&
origin_reduce_dims
)
{
const
std
::
vector
<
int
>&
origin_reduce_dims
,
bool
is_mean
=
false
)
{
#ifdef PADDLE_WITH_XPU_KP
auto
stream
=
dev_ctx
.
x_context
()
->
xpu_stream
;
#else
...
...
@@ -1069,8 +1085,18 @@ void ReduceKernel(const KPDevice& dev_ctx,
bool
use_cub_reduce
=
config
.
reduce_num
==
numel
&&
!
kIsTxFP16
;
#ifndef PADDLE_WITH_XPU_KP
if
(
use_cub_reduce
)
{
CubTensorReduceImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
x_data
,
y_data
,
transform
,
config
.
reduce_num
,
dev_ctx
,
stream
);
if
(
is_mean
)
{
using
Div
=
kps
::
DivideFunctor
<
Tx
>
;
CubTensorReduceImpl
<
Tx
,
Ty
,
ReduceOp
,
Div
>
(
x_data
,
y_data
,
Div
(
config
.
reduce_num
),
config
.
reduce_num
,
dev_ctx
,
stream
);
}
else
{
CubTensorReduceImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
x_data
,
y_data
,
transform
,
config
.
reduce_num
,
dev_ctx
,
stream
);
}
return
;
}
#endif
...
...
@@ -1115,7 +1141,9 @@ void ReduceKernel(const KPDevice& dev_ctx,
config
.
reduce_num
,
config
.
left_num
,
config
.
blocking_size
,
dim
);
dim
,
config
.
reduce_num
,
is_mean
&&
(
!
config
.
should_reduce_again
));
if
(
config
.
should_reduce_again
)
{
dim3
block
=
dim3
(
config
.
block
.
x
,
1
,
1
);
...
...
@@ -1125,15 +1153,19 @@ void ReduceKernel(const KPDevice& dev_ctx,
dim2
.
SetRem
(
config
.
left_num
%
config
.
block
.
x
,
0
,
0
);
#ifdef PADDLE_WITH_XPU_KP
grid
=
8
;
block
=
64
;
int
grid_size
=
8
;
int
block_size
=
64
;
#else
auto
grid_size
=
grid
;
auto
block_size
=
block
;
#endif
ReduceHigherDimKernel
<
Ty
,
Ty
,
MPType
,
ReduceOp
<
MPType
>
,
kps
::
IdentityFunctor
<
Ty
,
MPType
>><<<
grid
,
block
,
0
,
stream
>>>
(
kps
::
IdentityFunctor
<
Ty
,
MPType
>><<<
grid_size
,
block_size
,
0
,
stream
>>>
(
config
.
output_data
,
y_data
,
reducer
,
...
...
@@ -1142,7 +1174,9 @@ void ReduceKernel(const KPDevice& dev_ctx,
config
.
grid
.
y
,
config
.
left_num
,
config
.
grid
.
y
,
dim2
);
dim2
,
config
.
reduce_num
,
is_mean
);
}
return
;
}
...
...
@@ -1151,7 +1185,14 @@ void ReduceKernel(const KPDevice& dev_ctx,
// when reduce_dim.size() != 1 and reduce_dim.size() != x_dim.size(), this
// function will be used
LaunchReduceKernel
<
Tx
,
Ty
,
MPType
,
ReduceOp
<
MPType
>
,
TransformOp
>
(
x_data
,
y_data
,
reducer
,
transform
,
reducer
.
initial
(),
stream
,
config
);
x_data
,
y_data
,
reducer
,
transform
,
reducer
.
initial
(),
stream
,
config
,
is_mean
);
}
}
// namespace funcs
...
...
paddle/phi/kernels/gpu/reduce.h
浏览文件 @
7e1155ed
...
...
@@ -30,7 +30,8 @@ void Reduce(const KPDevice& dev_ctx,
const
std
::
vector
<
int64_t
>&
dims
,
bool
keep_dim
,
DataType
out_dtype
,
DenseTensor
*
out
)
{
DenseTensor
*
out
,
bool
is_mean
=
false
)
{
std
::
vector
<
int
>
reduce_dims
=
phi
::
funcs
::
details
::
GetReduceDim
(
dims
,
x
.
dims
().
size
(),
reduce_all
);
...
...
@@ -57,12 +58,18 @@ void Reduce(const KPDevice& dev_ctx,
tmp_tensor
,
out
,
TransformOp
<
data_t
,
MPType
>
(
reduce_num
),
reduce_dims
);
reduce_dims
,
is_mean
);
}));
}
else
{
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
dev_ctx
,
x
,
out
,
TransformOp
<
T
,
MPType
>
(
reduce_num
),
reduce_dims
);
dev_ctx
,
x
,
out
,
TransformOp
<
T
,
MPType
>
(
reduce_num
),
reduce_dims
,
is_mean
);
}
}
}
// namespace phi
...
...
paddle/phi/kernels/gpu/reduce_kernel.cu
浏览文件 @
7e1155ed
...
...
@@ -27,8 +27,8 @@ void MeanRawKernel(const Context& dev_ctx,
bool
reduce_all
,
DenseTensor
*
out
)
{
auto
out_dtype
=
x
.
dtype
();
phi
::
Reduce
<
T
,
kps
::
AddFunctor
,
kps
::
Divide
Functor
>
(
dev_ctx
,
x
,
reduce_all
,
dims
,
keep_dim
,
out_dtype
,
out
);
phi
::
Reduce
<
T
,
kps
::
AddFunctor
,
kps
::
Identity
Functor
>
(
dev_ctx
,
x
,
reduce_all
,
dims
,
keep_dim
,
out_dtype
,
out
,
true
);
}
template
<
typename
T
,
typename
Context
>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录