Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
6354f81c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6354f81c
编写于
2月 09, 2022
作者:
Y
Yiqun Liu
提交者:
GitHub
2月 09, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Rename partial function name TensorReduceFunctorImpl to TensorReduceImpl. (#39387)
上级
d7dddf94
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
37 addition
and
62 deletion
+37
-62
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
+1
-10
paddle/pten/kernels/gpu/elementwise.h
paddle/pten/kernels/gpu/elementwise.h
+4
-16
paddle/pten/kernels/gpu/reduce.h
paddle/pten/kernels/gpu/reduce.h
+31
-32
paddle/pten/kernels/impl/matmul_grad_kernel_impl.h
paddle/pten/kernels/impl/matmul_grad_kernel_impl.h
+1
-4
未找到文件。
paddle/fluid/operators/reduce_ops/reduce_op.cu.h
浏览文件 @
6354f81c
...
@@ -20,15 +20,6 @@
...
@@ -20,15 +20,6 @@
#include <set>
#include <set>
#include <vector>
#include <vector>
#ifdef __NVCC__
#include "cub/cub.cuh"
#endif
#ifdef __HIPCC__
#include <hipcub/hipcub.hpp>
namespace
cub
=
hipcub
;
#endif
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/dense_tensor.h"
...
@@ -46,7 +37,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
...
@@ -46,7 +37,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
gpuStream_t
stream
)
{
gpuStream_t
stream
)
{
y
->
mutable_data
<
Ty
>
(
x
.
place
());
y
->
mutable_data
<
Ty
>
(
x
.
place
());
pten
::
kernels
::
TensorReduce
Functor
Impl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
pten
::
kernels
::
TensorReduceImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
static_cast
<
const
pten
::
GPUContext
&>
(
dev_ctx
),
x
,
y
,
transform
,
static_cast
<
const
pten
::
GPUContext
&>
(
dev_ctx
),
x
,
y
,
transform
,
origin_reduce_dims
,
stream
);
origin_reduce_dims
,
stream
);
}
}
...
...
paddle/pten/kernels/gpu/elementwise.h
浏览文件 @
6354f81c
...
@@ -2016,10 +2016,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
...
@@ -2016,10 +2016,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std
::
vector
<
int
>
reduce_dims
=
std
::
vector
<
int
>
reduce_dims
=
funcs
::
GetReduceDim
(
x
.
dims
(),
out
.
dims
(),
axis
);
funcs
::
GetReduceDim
(
x
.
dims
(),
out
.
dims
(),
axis
);
gpuStream_t
stream
=
ctx
.
stream
();
gpuStream_t
stream
=
ctx
.
stream
();
kernels
::
TensorReduceFunctorImpl
<
T
,
kernels
::
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
,
dout
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
ctx
,
dout
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
}
}
...
@@ -2034,10 +2031,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
...
@@ -2034,10 +2031,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std
::
vector
<
int
>
reduce_dims
=
std
::
vector
<
int
>
reduce_dims
=
funcs
::
GetReduceDim
(
y
.
dims
(),
out
.
dims
(),
axis
);
funcs
::
GetReduceDim
(
y
.
dims
(),
out
.
dims
(),
axis
);
gpuStream_t
stream
=
ctx
.
stream
();
gpuStream_t
stream
=
ctx
.
stream
();
kernels
::
TensorReduceFunctorImpl
<
T
,
kernels
::
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
,
dout
,
dy
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
ctx
,
dout
,
dy
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
}
}
...
@@ -2133,10 +2127,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
...
@@ -2133,10 +2127,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std
::
vector
<
int
>
reduce_dims
=
std
::
vector
<
int
>
reduce_dims
=
funcs
::
GetReduceDim
(
x
.
dims
(),
out
.
dims
(),
axis
);
funcs
::
GetReduceDim
(
x
.
dims
(),
out
.
dims
(),
axis
);
gpuStream_t
stream
=
ctx
.
stream
();
gpuStream_t
stream
=
ctx
.
stream
();
kernels
::
TensorReduceFunctorImpl
<
T
,
kernels
::
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
ctx
,
dout
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
ctx
,
dout
,
dx
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
}
}
...
@@ -2157,10 +2148,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
...
@@ -2157,10 +2148,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std
::
vector
<
int
>
reduce_dims
=
std
::
vector
<
int
>
reduce_dims
=
funcs
::
GetReduceDim
(
y
.
dims
(),
out
.
dims
(),
axis
);
funcs
::
GetReduceDim
(
y
.
dims
(),
out
.
dims
(),
axis
);
gpuStream_t
stream
=
ctx
.
stream
();
gpuStream_t
stream
=
ctx
.
stream
();
kernels
::
TensorReduceFunctorImpl
<
T
,
kernels
::
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
InverseFunctor
<
T
>>
(
T
,
kps
::
AddFunctor
,
kps
::
InverseFunctor
<
T
>>
(
ctx
,
dout
,
dy
,
kps
::
InverseFunctor
<
T
>
(),
reduce_dims
,
stream
);
ctx
,
dout
,
dy
,
kps
::
InverseFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
}
}
...
...
paddle/pten/kernels/gpu/reduce.h
浏览文件 @
6354f81c
...
@@ -1007,12 +1007,12 @@ template <typename Tx,
...
@@ -1007,12 +1007,12 @@ template <typename Tx,
static
static
typename
std
::
enable_if
<!
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
,
typename
std
::
enable_if
<!
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
,
void
>::
type
void
>::
type
CubTensorReduce
Functor
Impl
(
const
Tx
*
x_data
,
CubTensorReduceImpl
(
const
Tx
*
x_data
,
Ty
*
y_data
,
Ty
*
y_data
,
const
TransformOp
&
transform
,
const
TransformOp
&
transform
,
int
reduce_num
,
int
reduce_num
,
const
paddle
::
platform
::
Place
&
place
,
const
paddle
::
platform
::
Place
&
place
,
gpuStream_t
stream
)
{
gpuStream_t
stream
)
{
auto
reducer
=
ReduceOp
<
Ty
>
();
auto
reducer
=
ReduceOp
<
Ty
>
();
cub
::
TransformInputIterator
<
Ty
,
TransformOp
,
const
Tx
*>
trans_x
(
x_data
,
cub
::
TransformInputIterator
<
Ty
,
TransformOp
,
const
Tx
*>
trans_x
(
x_data
,
transform
);
transform
);
...
@@ -1051,12 +1051,12 @@ template <typename Tx,
...
@@ -1051,12 +1051,12 @@ template <typename Tx,
static
static
typename
std
::
enable_if
<
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
,
typename
std
::
enable_if
<
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
,
void
>::
type
void
>::
type
CubTensorReduce
Functor
Impl
(
const
Tx
*
x_data
,
CubTensorReduceImpl
(
const
Tx
*
x_data
,
Ty
*
y_data
,
Ty
*
y_data
,
const
TransformOp
&
transform
,
const
TransformOp
&
transform
,
int
reduce_num
,
int
reduce_num
,
const
paddle
::
platform
::
Place
&
place
,
const
paddle
::
platform
::
Place
&
place
,
gpuStream_t
stream
)
{
gpuStream_t
stream
)
{
PADDLE_THROW
(
pten
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
pten
::
errors
::
InvalidArgument
(
"Tx should not be float16 when using cub::DeviceReduce::Reduce()."
));
"Tx should not be float16 when using cub::DeviceReduce::Reduce()."
));
}
}
...
@@ -1065,12 +1065,12 @@ template <typename Tx,
...
@@ -1065,12 +1065,12 @@ template <typename Tx,
typename
Ty
,
typename
Ty
,
template
<
typename
>
class
ReduceOp
,
template
<
typename
>
class
ReduceOp
,
typename
TransformOp
>
typename
TransformOp
>
void
TensorReduce
Functor
Impl
(
const
pten
::
GPUContext
&
dev_ctx
,
void
TensorReduceImpl
(
const
pten
::
GPUContext
&
dev_ctx
,
const
pten
::
DenseTensor
&
x
,
const
pten
::
DenseTensor
&
x
,
pten
::
DenseTensor
*
y
,
pten
::
DenseTensor
*
y
,
const
TransformOp
&
transform
,
const
TransformOp
&
transform
,
const
std
::
vector
<
int
>&
origin_reduce_dims
,
const
std
::
vector
<
int
>&
origin_reduce_dims
,
gpuStream_t
stream
)
{
gpuStream_t
stream
)
{
y
->
mutable_data
<
Ty
>
(
x
.
place
());
y
->
mutable_data
<
Ty
>
(
x
.
place
());
auto
x_dim
=
pten
::
framework
::
vectorize
<
int
>
(
x
.
dims
());
auto
x_dim
=
pten
::
framework
::
vectorize
<
int
>
(
x
.
dims
());
...
@@ -1102,7 +1102,7 @@ void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx,
...
@@ -1102,7 +1102,7 @@ void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx,
constexpr
bool
kIsTxFP16
=
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
;
constexpr
bool
kIsTxFP16
=
std
::
is_same
<
Tx
,
paddle
::
platform
::
float16
>::
value
;
bool
use_cub_reduce
=
config
.
reduce_num
==
numel
&&
!
kIsTxFP16
;
bool
use_cub_reduce
=
config
.
reduce_num
==
numel
&&
!
kIsTxFP16
;
if
(
use_cub_reduce
)
{
if
(
use_cub_reduce
)
{
CubTensorReduce
Functor
Impl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
CubTensorReduceImpl
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
>
(
x_data
,
y_data
,
transform
,
config
.
reduce_num
,
x
.
place
(),
stream
);
x_data
,
y_data
,
transform
,
config
.
reduce_num
,
x
.
place
(),
stream
);
return
;
return
;
}
}
...
@@ -1239,13 +1239,13 @@ void Reduce(const GPUContext& dev_ctx,
...
@@ -1239,13 +1239,13 @@ void Reduce(const GPUContext& dev_ctx,
pten
::
DataType
::
INT64
,
pten
::
DataType
::
INT64
,
pten
::
DataType
::
FLOAT16
,
pten
::
DataType
::
FLOAT16
,
out_dtype
,
out_dtype
,
"TensorReduce
Functor
Impl"
,
"TensorReduceImpl"
,
([
&
]
{
([
&
]
{
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
data_t
>::
Type
;
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
data_t
>::
Type
;
pten
::
kernels
::
TensorReduce
Functor
Impl
<
data_t
,
pten
::
kernels
::
TensorReduceImpl
<
data_t
,
data_t
,
data_t
,
ReduceOp
,
ReduceOp
,
TransformOp
<
data_t
,
MPType
>>
(
TransformOp
<
data_t
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
tmp_tensor
,
tmp_tensor
,
out
,
out
,
...
@@ -1255,14 +1255,13 @@ void Reduce(const GPUContext& dev_ctx,
...
@@ -1255,14 +1255,13 @@ void Reduce(const GPUContext& dev_ctx,
}));
}));
}
else
{
}
else
{
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
pten
::
kernels
::
pten
::
kernels
::
TensorReduceImpl
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
TensorReduceFunctorImpl
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
x
,
x
,
out
,
out
,
TransformOp
<
T
,
MPType
>
(
reduce_num
),
TransformOp
<
T
,
MPType
>
(
reduce_num
),
reduce_dims
,
reduce_dims
,
stream
);
stream
);
}
}
}
}
}
// namespace pten
}
// namespace pten
...
...
paddle/pten/kernels/impl/matmul_grad_kernel_impl.h
浏览文件 @
6354f81c
...
@@ -60,10 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
...
@@ -60,10 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
DenseTensor
*
output
,
DenseTensor
*
output
,
const
std
::
vector
<
int
>&
reduce_dims
)
{
const
std
::
vector
<
int
>&
reduce_dims
)
{
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
kernels
::
TensorReduceFunctorImpl
<
T
,
kernels
::
TensorReduceImpl
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
>>
(
dev_ctx
,
input
,
output
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
dev_ctx
,
input
,
output
,
kps
::
IdentityFunctor
<
T
>
(),
reduce_dims
,
stream
);
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录