Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f469f176
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f469f176
编写于
6月 20, 2023
作者:
Y
Yiqun Liu
提交者:
GitHub
6月 20, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove reduntant definition of MPTypeTrait. (#54756)
上级
80975d45
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
26 addition
and
44 deletion
+26
-44
paddle/phi/kernels/funcs/dropout_impl.cu.h
paddle/phi/kernels/funcs/dropout_impl.cu.h
+5
-5
paddle/phi/kernels/funcs/reduce_function.h
paddle/phi/kernels/funcs/reduce_function.h
+1
-1
paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
...e/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
+2
-2
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
+3
-3
paddle/phi/kernels/gpu/exponential_kernel.cu
paddle/phi/kernels/gpu/exponential_kernel.cu
+1
-1
paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
+2
-2
paddle/phi/kernels/gpu/multinomial_kernel.cu
paddle/phi/kernels/gpu/multinomial_kernel.cu
+1
-1
paddle/phi/kernels/gpu/reduce.h
paddle/phi/kernels/gpu/reduce.h
+3
-3
paddle/phi/kernels/gpu/reduce_amin_amax_common.h
paddle/phi/kernels/gpu/reduce_amin_amax_common.h
+1
-1
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
+1
-1
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
+1
-1
paddle/phi/kernels/gpu/rrelu_kernel.cu
paddle/phi/kernels/gpu/rrelu_kernel.cu
+1
-1
paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
+1
-1
paddle/phi/kernels/gpu/uniform_kernel.cu
paddle/phi/kernels/gpu/uniform_kernel.cu
+1
-1
paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
+1
-1
paddle/phi/kernels/primitive/compute_primitives.h
paddle/phi/kernels/primitive/compute_primitives.h
+1
-19
未找到文件。
paddle/phi/kernels/funcs/dropout_impl.cu.h
浏览文件 @
f469f176
...
@@ -40,7 +40,7 @@ namespace funcs {
...
@@ -40,7 +40,7 @@ namespace funcs {
template
<
typename
T
>
template
<
typename
T
>
struct
DstFunctor
{
struct
DstFunctor
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
HOSTDEVICE
inline
DstFunctor
(
const
float
retain_prob
,
HOSTDEVICE
inline
DstFunctor
(
const
float
retain_prob
,
const
bool
is_upscale_in_train
,
const
bool
is_upscale_in_train
,
...
@@ -90,7 +90,7 @@ struct MaskFunctor {
...
@@ -90,7 +90,7 @@ struct MaskFunctor {
template
<
typename
T
>
template
<
typename
T
>
struct
DstMaskFunctor
{
struct
DstMaskFunctor
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
HOSTDEVICE
inline
DstMaskFunctor
(
const
float
retain_prob
,
HOSTDEVICE
inline
DstMaskFunctor
(
const
float
retain_prob
,
const
bool
is_upscale_in_train
)
const
bool
is_upscale_in_train
)
:
retain_prob_
(
retain_prob
),
is_upscale_in_train_
(
is_upscale_in_train
)
{
:
retain_prob_
(
retain_prob
),
is_upscale_in_train_
(
is_upscale_in_train
)
{
...
@@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver(
...
@@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver(
// y = x
// y = x
phi
::
Copy
(
dev_ctx
,
x
,
dev_ctx
.
GetPlace
(),
false
,
y
);
phi
::
Copy
(
dev_ctx
,
x
,
dev_ctx
.
GetPlace
(),
false
,
y
);
}
else
{
}
else
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MT
factor
=
static_cast
<
MT
>
(
1.0
f
-
dropout_prob
);
MT
factor
=
static_cast
<
MT
>
(
1.0
f
-
dropout_prob
);
// y = factor * x
// y = factor * x
ScaleByDropoutFactor
<
T
,
MT
>
(
dev_ctx
,
x
,
y
,
factor
);
ScaleByDropoutFactor
<
T
,
MT
>
(
dev_ctx
,
x
,
y
,
factor
);
...
@@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver(
...
@@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver(
template
<
typename
T
>
template
<
typename
T
>
struct
CudaDropoutGradFunctor
{
struct
CudaDropoutGradFunctor
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
explicit
CudaDropoutGradFunctor
(
const
MT
factor
)
:
factor_
(
factor
)
{}
explicit
CudaDropoutGradFunctor
(
const
MT
factor
)
:
factor_
(
factor
)
{}
...
@@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx,
...
@@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx,
const
phi
::
DenseTensor
&
mask
,
const
phi
::
DenseTensor
&
mask
,
phi
::
DenseTensor
*
grad_x
,
phi
::
DenseTensor
*
grad_x
,
bool
is_dropout_nd
=
false
)
{
bool
is_dropout_nd
=
false
)
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
if
(
is_test
)
{
if
(
is_test
)
{
...
...
paddle/phi/kernels/funcs/reduce_function.h
浏览文件 @
f469f176
...
@@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
...
@@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
}
}
#endif
#endif
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
Ty
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
Ty
>::
Type
;
auto
reducer
=
ReduceOp
<
MPType
>
();
auto
reducer
=
ReduceOp
<
MPType
>
();
// launch ReduceHigherDimKernel
// launch ReduceHigherDimKernel
// when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this
// when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this
...
...
paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu
浏览文件 @
f469f176
...
@@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad,
...
@@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad,
template
<
typename
T1
,
typename
T2
=
T1
,
typename
OutT
=
T1
>
template
<
typename
T1
,
typename
T2
=
T1
,
typename
OutT
=
T1
>
struct
NoMaskBwFunctor
{
struct
NoMaskBwFunctor
{
const
float
retain_prob_
;
const
float
retain_prob_
;
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T1
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T1
>::
Type
;
MT
factor_
;
MT
factor_
;
HOSTDEVICE
inline
NoMaskBwFunctor
(
const
float
retain_prob
)
HOSTDEVICE
inline
NoMaskBwFunctor
(
const
float
retain_prob
)
:
retain_prob_
(
retain_prob
)
{
:
retain_prob_
(
retain_prob
)
{
...
@@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx,
...
@@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx,
auto
*
y_grad_data
=
dev_ctx
.
template
Alloc
<
T
>(
y_grad
);
auto
*
y_grad_data
=
dev_ctx
.
template
Alloc
<
T
>(
y_grad
);
const
auto
*
out_grad_data
=
out_grad
.
data
<
T
>
();
const
auto
*
out_grad_data
=
out_grad
.
data
<
T
>
();
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
int
blocks
=
NumBlocks
(
numel
);
int
blocks
=
NumBlocks
(
numel
);
int
threads
=
kNumCUDAThreads
;
int
threads
=
kNumCUDAThreads
;
...
...
paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu
浏览文件 @
f469f176
...
@@ -29,7 +29,7 @@ template <typename T1, typename T2 = T1, typename OutT = T1>
...
@@ -29,7 +29,7 @@ template <typename T1, typename T2 = T1, typename OutT = T1>
struct
NoMaskFwFunctor
{
struct
NoMaskFwFunctor
{
const
float
retain_prob_
;
const
float
retain_prob_
;
const
bool
is_upscale_in_train_
;
const
bool
is_upscale_in_train_
;
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T1
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T1
>::
Type
;
MT
factor
;
MT
factor
;
HOSTDEVICE
inline
NoMaskFwFunctor
(
const
float
retain_prob
,
HOSTDEVICE
inline
NoMaskFwFunctor
(
const
float
retain_prob
,
const
bool
is_upscale_in_train
)
const
bool
is_upscale_in_train
)
...
@@ -59,7 +59,7 @@ struct NoMaskFwFunctor {
...
@@ -59,7 +59,7 @@ struct NoMaskFwFunctor {
template
<
typename
T
>
template
<
typename
T
>
struct
ScaleAddFuctor
{
struct
ScaleAddFuctor
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
explicit
ScaleAddFuctor
(
const
MT
factor
,
bool
upscale_in_train
)
explicit
ScaleAddFuctor
(
const
MT
factor
,
bool
upscale_in_train
)
:
factor_
(
factor
),
upscale_in_train_
(
upscale_in_train
)
{}
:
factor_
(
factor
),
upscale_in_train_
(
upscale_in_train
)
{}
...
@@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx,
...
@@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx,
dst_functor
);
dst_functor
);
#undef PD_DROPOUT_KERNEL_NAME
#undef PD_DROPOUT_KERNEL_NAME
}
else
{
}
else
{
using
MT
=
typename
phi
::
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MT
factor
=
static_cast
<
MT
>
(
1.0
f
-
dropout_rate
);
MT
factor
=
static_cast
<
MT
>
(
1.0
f
-
dropout_rate
);
std
::
vector
<
phi
::
DenseTensor
*>
outs
=
{
out
};
std
::
vector
<
phi
::
DenseTensor
*>
outs
=
{
out
};
std
::
vector
<
const
phi
::
DenseTensor
*>
ins
=
{
&
x
,
&
y
};
std
::
vector
<
const
phi
::
DenseTensor
*>
ins
=
{
&
x
,
&
y
};
...
...
paddle/phi/kernels/gpu/exponential_kernel.cu
浏览文件 @
f469f176
...
@@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx,
...
@@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
float
lambda
,
float
lambda
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
uniform_distribution
<
MT
>
dist
;
phi
::
funcs
::
uniform_distribution
<
MT
>
dist
;
phi
::
funcs
::
exponential_transform
<
MT
>
trans
(
lambda
);
phi
::
funcs
::
exponential_transform
<
MT
>
trans
(
lambda
);
phi
::
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
phi
::
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
...
...
paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
浏览文件 @
f469f176
...
@@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x,
...
@@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x,
int
group_size
,
int
group_size
,
float
epsilon
,
float
epsilon
,
T
*
d_x
)
{
T
*
d_x
)
{
// using AccT = typename
kps::details
::MPTypeTrait<T>::Type;
// using AccT = typename
phi::dtype
::MPTypeTrait<T>::Type;
int
gid
=
blockIdx
.
y
;
int
gid
=
blockIdx
.
y
;
int
cid
=
blockIdx
.
x
;
int
cid
=
blockIdx
.
x
;
...
@@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx,
...
@@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx,
DenseTensor
*
d_x
,
DenseTensor
*
d_x
,
DenseTensor
*
d_scale
,
DenseTensor
*
d_scale
,
DenseTensor
*
d_bias
)
{
DenseTensor
*
d_bias
)
{
using
AccT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
AccT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
const
DataLayout
data_layout
=
phi
::
StringToDataLayout
(
data_layout_str
);
const
DataLayout
data_layout
=
phi
::
StringToDataLayout
(
data_layout_str
);
const
auto
scale_ptr
=
scale
.
get_ptr
();
const
auto
scale_ptr
=
scale
.
get_ptr
();
const
auto
bias_ptr
=
bias
.
get_ptr
();
const
auto
bias_ptr
=
bias
.
get_ptr
();
...
...
paddle/phi/kernels/gpu/multinomial_kernel.cu
浏览文件 @
f469f176
...
@@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx,
const
Scalar
&
num_samples
,
const
Scalar
&
num_samples
,
bool
replacement
,
bool
replacement
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
auto
int_num_samples
=
num_samples
.
to
<
int
>
();
auto
int_num_samples
=
num_samples
.
to
<
int
>
();
auto
*
in_data
=
x
.
data
<
T
>
();
auto
*
in_data
=
x
.
data
<
T
>
();
...
...
paddle/phi/kernels/gpu/reduce.h
浏览文件 @
f469f176
...
@@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx,
...
@@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx,
out_dtype
,
out_dtype
,
"ReduceKernel"
,
"ReduceKernel"
,
([
&
]
{
([
&
]
{
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
data_t
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
data_t
>::
Type
;
phi
::
funcs
::
ReduceKernel
<
data_t
,
phi
::
funcs
::
ReduceKernel
<
data_t
,
data_t
,
data_t
,
ReduceOp
,
ReduceOp
,
...
@@ -68,7 +68,7 @@ void Reduce(const KPDevice& dev_ctx,
...
@@ -68,7 +68,7 @@ void Reduce(const KPDevice& dev_ctx,
is_mean
);
is_mean
);
}));
}));
}
else
{
}
else
{
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
x
,
x
,
...
@@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx,
...
@@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx,
is_mean
);
is_mean
);
}
}
#else
#else
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
ReduceOp
,
TransformOp
<
T
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
x
,
x
,
...
...
paddle/phi/kernels/gpu/reduce_amin_amax_common.h
浏览文件 @
f469f176
...
@@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx,
...
@@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx,
funcs
::
BroadcastKernel
<
T
>
(
funcs
::
BroadcastKernel
<
T
>
(
dev_ctx
,
equal_inputs
,
&
equal_outputs
,
funcs
::
EqualFunctor
<
T
>
(),
0
);
dev_ctx
,
equal_inputs
,
&
equal_outputs
,
funcs
::
EqualFunctor
<
T
>
(),
0
);
// 2. equal_count = reduceSum(equal_out)
// 2. equal_count = reduceSum(equal_out)
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
phi
::
funcs
::
ReduceKernel
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
,
MPType
>>
(
ReduceKernel
<
T
,
T
,
kps
::
AddFunctor
,
kps
::
IdentityFunctor
<
T
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
...
...
paddle/phi/kernels/gpu/reduce_mean_grad_kernel.cu
浏览文件 @
f469f176
...
@@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
...
@@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
std
::
vector
<
const
DenseTensor
*>
inputs
=
{
&
new_out_grad
};
std
::
vector
<
const
DenseTensor
*>
inputs
=
{
&
new_out_grad
};
std
::
vector
<
DenseTensor
*>
outputs
=
{
x_grad
};
std
::
vector
<
DenseTensor
*>
outputs
=
{
x_grad
};
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
BroadcastKernel
<
T
>
(
funcs
::
BroadcastKernel
<
T
>
(
dev_ctx
,
inputs
,
&
outputs
,
kps
::
DivideFunctor
<
T
,
MPType
>
(
reduce_num
),
0
);
dev_ctx
,
inputs
,
&
outputs
,
kps
::
DivideFunctor
<
T
,
MPType
>
(
reduce_num
),
0
);
}
}
...
...
paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu
浏览文件 @
f469f176
...
@@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx,
...
@@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx,
// call ReduceGrad
// call ReduceGrad
dev_ctx
.
Alloc
(
x_grad
,
x
.
dtype
());
dev_ctx
.
Alloc
(
x_grad
,
x
.
dtype
());
using
MPType
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
phi
::
ReduceGrad
<
kps
::
IdentityFunctor
<
T
,
MPType
>>
(
phi
::
ReduceGrad
<
kps
::
IdentityFunctor
<
T
,
MPType
>>
(
dev_ctx
,
dev_ctx
,
&
new_out_grad
,
&
new_out_grad
,
...
...
paddle/phi/kernels/gpu/rrelu_kernel.cu
浏览文件 @
f469f176
...
@@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx,
...
@@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx,
RReluTestCudaFunctor
<
T
>
functor
(
x_data
,
out_data
,
noise_data
,
mid_val
);
RReluTestCudaFunctor
<
T
>
functor
(
x_data
,
out_data
,
noise_data
,
mid_val
);
for_range
(
functor
);
for_range
(
functor
);
}
else
{
}
else
{
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_real_transform
<
MT
>
trans
(
lower
,
upper
);
funcs
::
uniform_real_transform
<
MT
>
trans
(
lower
,
upper
);
funcs
::
distribution_and_transform
<
T
>
(
ctx
,
noise
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
ctx
,
noise
,
dist
,
trans
);
...
...
paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
浏览文件 @
f469f176
...
@@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx,
...
@@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx,
ctx
.
template
Alloc
<
T
>(
out
);
ctx
.
template
Alloc
<
T
>(
out
);
if
(
seed
==
0
)
{
if
(
seed
==
0
)
{
// Use global Generator seed
// Use global Generator seed
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
funcs
::
distribution_and_transform
<
T
>
(
ctx
,
out
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
ctx
,
out
,
dist
,
trans
);
...
...
paddle/phi/kernels/gpu/uniform_kernel.cu
浏览文件 @
f469f176
...
@@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx,
...
@@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
T
>(
out
);
dev_ctx
.
template
Alloc
<
T
>(
out
);
if
(
seed
==
0
)
{
if
(
seed
==
0
)
{
// Use global Generator seed
// Use global Generator seed
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
.
to
<
float
>
(),
max
.
to
<
float
>
());
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
.
to
<
float
>
(),
max
.
to
<
float
>
());
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
...
...
paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
浏览文件 @
f469f176
...
@@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx,
...
@@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx,
dev_ctx
.
template
Alloc
<
T
>(
out
);
dev_ctx
.
template
Alloc
<
T
>(
out
);
if
(
seed
==
0
)
{
if
(
seed
==
0
)
{
// Use global Generator seed
// Use global Generator seed
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
.
to
<
float
>
(),
max
.
to
<
float
>
());
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
.
to
<
float
>
(),
max
.
to
<
float
>
());
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
...
...
paddle/phi/kernels/primitive/compute_primitives.h
浏览文件 @
f469f176
...
@@ -22,7 +22,7 @@
...
@@ -22,7 +22,7 @@
#endif
#endif
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/common/
float16
.h"
#include "paddle/phi/common/
amp_type_traits
.h"
namespace
phi
{
namespace
phi
{
namespace
kps
{
namespace
kps
{
...
@@ -40,24 +40,6 @@ constexpr int kWarpSize = 32;
...
@@ -40,24 +40,6 @@ constexpr int kWarpSize = 32;
// kLocalMode: thread reduce, each thread gets an output;
// kLocalMode: thread reduce, each thread gets an output;
enum
ReduceMode
{
kGlobalMode
,
kLocalMode
};
enum
ReduceMode
{
kGlobalMode
,
kLocalMode
};
template
<
typename
T
>
class
MPTypeTrait
{
public:
using
Type
=
T
;
};
template
<
>
class
MPTypeTrait
<
phi
::
dtype
::
float16
>
{
public:
using
Type
=
float
;
};
template
<
>
class
MPTypeTrait
<
phi
::
dtype
::
bfloat16
>
{
public:
using
Type
=
float
;
};
/**
/**
* @brief Will be used in BlockYReduce, get the index of reduce_num in shared
* @brief Will be used in BlockYReduce, get the index of reduce_num in shared
* memory.
* memory.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录