Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
eb38c85f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
eb38c85f
编写于
4月 04, 2023
作者:
H
huangjiyi
提交者:
GitHub
4月 04, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
register fluid kerenls to phi [part 5] (#52486)
* update * fix bug * update * fix bug
上级
5bac67d4
变更
58
隐藏空白更改
内联
并排
Showing
58 changed file
with
568 addition
and
402 deletion
+568
-402
paddle/fluid/operators/controlflow/fetch_v2_op.cc
paddle/fluid/operators/controlflow/fetch_v2_op.cc
+17
-25
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+3
-3
paddle/fluid/operators/fc_op.cu.cc
paddle/fluid/operators/fc_op.cu.cc
+3
-4
paddle/fluid/operators/fc_op.h
paddle/fluid/operators/fc_op.h
+1
-1
paddle/fluid/operators/fill_zeros_like_op.cc
paddle/fluid/operators/fill_zeros_like_op.cc
+25
-21
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+27
-23
paddle/fluid/operators/fill_zeros_like_op.h
paddle/fluid/operators/fill_zeros_like_op.h
+4
-1
paddle/fluid/operators/filter_by_instag_op.cc
paddle/fluid/operators/filter_by_instag_op.cc
+17
-11
paddle/fluid/operators/filter_by_instag_op.cu
paddle/fluid/operators/filter_by_instag_op.cu
+19
-13
paddle/fluid/operators/filter_by_instag_op.h
paddle/fluid/operators/filter_by_instag_op.h
+2
-2
paddle/fluid/operators/fsp_op.cc
paddle/fluid/operators/fsp_op.cc
+5
-6
paddle/fluid/operators/fsp_op.cu
paddle/fluid/operators/fsp_op.cu
+4
-7
paddle/fluid/operators/fsp_op.h
paddle/fluid/operators/fsp_op.h
+2
-2
paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc
paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc
+4
-2
paddle/fluid/operators/fused/fused_attention_op.cu
paddle/fluid/operators/fused/fused_attention_op.cu
+17
-10
paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu
...rators/fused/fused_bias_dropout_residual_layer_norm_op.cu
+17
-11
paddle/fluid/operators/fused/fused_bn_activation_op.cu
paddle/fluid/operators/fused/fused_bn_activation_op.cu
+26
-14
paddle/fluid/operators/fused/fused_bn_activation_op.h
paddle/fluid/operators/fused/fused_bn_activation_op.h
+2
-2
paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
+22
-10
paddle/fluid/operators/fused/fused_bn_add_activation_op.h
paddle/fluid/operators/fused/fused_bn_add_activation_op.h
+2
-2
paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
+26
-18
paddle/fluid/operators/fused/fused_elemwise_activation_op.cu
paddle/fluid/operators/fused/fused_elemwise_activation_op.cu
+31
-27
paddle/fluid/operators/fused/fused_elemwise_activation_op.h
paddle/fluid/operators/fused/fused_elemwise_activation_op.h
+11
-2
paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
...d/operators/fused/fused_embedding_eltwise_layernorm_op.cu
+13
-9
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+7
-6
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+12
-6
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+2
-2
paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu
...luid/operators/fused/fused_fc_elementwise_layernorm_op.cu
+10
-6
paddle/fluid/operators/fused/fused_feedforward_op.cu
paddle/fluid/operators/fused/fused_feedforward_op.cu
+18
-13
paddle/fluid/operators/fused/fused_gate_attention_op.cu
paddle/fluid/operators/fused/fused_gate_attention_op.cu
+32
-20
paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu
paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu
+29
-19
paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu
.../fluid/operators/fused/fused_multi_transformer_int8_op.cu
+7
-4
paddle/fluid/operators/fused/fused_multi_transformer_op.cu
paddle/fluid/operators/fused/fused_multi_transformer_op.cu
+8
-5
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
+10
-4
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+10
-7
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
+2
-2
paddle/fluid/operators/fused/fusion_conv_inception_op.cu
paddle/fluid/operators/fused/fusion_conv_inception_op.cu
+12
-6
paddle/fluid/operators/fused/fusion_group_op.cu.cc
paddle/fluid/operators/fused/fusion_group_op.cu.cc
+7
-4
paddle/fluid/operators/fused/fusion_group_op.h
paddle/fluid/operators/fused/fusion_group_op.h
+1
-1
paddle/fluid/operators/fused/fusion_group_op_test.cc
paddle/fluid/operators/fused/fusion_group_op_test.cc
+2
-1
paddle/fluid/operators/fused/fusion_gru_op.cc
paddle/fluid/operators/fused/fusion_gru_op.cc
+3
-6
paddle/fluid/operators/fused/fusion_lstm_op.cc
paddle/fluid/operators/fused/fusion_lstm_op.cc
+3
-5
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
+7
-4
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
...le/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
+7
-5
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
...le/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+7
-5
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
+7
-4
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
+7
-4
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc
+7
-4
paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
.../operators/fused/fusion_transpose_flatten_concat_op.cu.cc
+8
-4
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc
...e/fluid/operators/fused_softmax_mask_upper_triangle_op.cc
+7
-4
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu
...e/fluid/operators/fused_softmax_mask_upper_triangle_op.cu
+21
-15
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.h
...le/fluid/operators/fused_softmax_mask_upper_triangle_op.h
+1
-1
paddle/fluid/operators/fused_token_prune_op.cu
paddle/fluid/operators/fused_token_prune_op.cu
+7
-4
paddle/fluid/operators/optimizers/ftrl_op.cc
paddle/fluid/operators/optimizers/ftrl_op.cc
+2
-1
paddle/fluid/operators/optimizers/ftrl_op.cu
paddle/fluid/operators/optimizers/ftrl_op.cu
+1
-1
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+1
-1
paddle/fluid/operators/string/faster_tokenizer_op.cc
paddle/fluid/operators/string/faster_tokenizer_op.cc
+2
-1
paddle/fluid/operators/string/faster_tokenizer_op.h
paddle/fluid/operators/string/faster_tokenizer_op.h
+1
-1
未找到文件。
paddle/fluid/operators/controlflow/fetch_v2_op.cc
浏览文件 @
eb38c85f
...
...
@@ -116,6 +116,7 @@ class FetchV2Op : public framework::OperatorWithKernel {
}
};
template
<
typename
T
,
typename
DeviceContext
>
class
FetchV2Kernel
{
public:
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
...
...
@@ -228,28 +229,19 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
fetch_v2
,
float
,
ops
::
FetchV2Kernel
,
double
,
ops
::
FetchV2Kernel
,
int8_t
,
ops
::
FetchV2Kernel
,
uint8_t
,
ops
::
FetchV2Kernel
,
int
,
ops
::
FetchV2Kernel
,
int64_t
,
ops
::
FetchV2Kernel
,
bool
,
ops
::
FetchV2Kernel
,
paddle
::
platform
::
bfloat16
,
ops
::
FetchV2Kernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
FetchV2Kernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
FetchV2Kernel
,
plat
::
float16
,
ops
::
FetchV2Kernel
,
int16_t
,
ops
::
FetchV2Kernel
);
PD_REGISTER_STRUCT_KERNEL
(
fetch_v2
,
CPU
,
ALL_LAYOUT
,
ops
::
FetchV2Kernel
,
float
,
double
,
int
,
int8_t
,
int16_t
,
int64_t
,
uint8_t
,
bool
,
plat
::
float16
,
plat
::
bfloat16
,
plat
::
complex
<
float
>
,
plat
::
complex
<
double
>
)
{}
paddle/fluid/operators/fc_op.cc
浏览文件 @
eb38c85f
...
...
@@ -206,6 +206,6 @@ REGISTER_OPERATOR(
ops
::
FCOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL
(
fc
,
ops
::
FCOpKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FCOpKernel
<
phi
::
CPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fc
,
CPU
,
ALL_LAYOUT
,
ops
::
FCOpKernel
,
float
,
double
)
{
}
paddle/fluid/operators/fc_op.cu.cc
浏览文件 @
eb38c85f
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include "paddle/fluid/operators/fc_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fc
,
ops
::
FCOpKernel
<
phi
::
GPUContext
,
phi
::
dtype
::
float16
>
,
ops
::
FCOpKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FCOpKernel
<
phi
::
GPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fc
,
GPU
,
ALL_LAYOUT
,
ops
::
FCOpKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/fluid/operators/fc_op.h
浏览文件 @
eb38c85f
...
...
@@ -51,7 +51,7 @@ inline void FCOutputSize(const framework::DDim& in_dims,
out_dims
.
push_back
(
w_dims1
);
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FCOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
paddle/fluid/operators/fill_zeros_like_op.cc
浏览文件 @
eb38c85f
...
...
@@ -80,6 +80,8 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(FillZerosLikeOp2NoNeedBufferVarsInferer,
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
ops
::
FillZerosLikeOpMaker
);
...
...
@@ -92,24 +94,26 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
bool
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
paddle
::
platform
::
complex
<
double
>>
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
bool
>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
FillZerosLikeKernel
<
phi
::
CPUContext
,
paddle
::
platform
::
complex
<
double
>>
);
PD_REGISTER_STRUCT_KERNEL
(
fill_zeros_like
,
CPU
,
ALL_LAYOUT
,
ops
::
FillZerosLikeKernel
,
int
,
int64_t
,
float
,
double
,
bool
,
plat
::
complex
<
float
>
,
plat
::
complex
<
double
>
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fill_zeros_like2
,
CPU
,
ALL_LAYOUT
,
ops
::
FillZerosLikeKernel2
,
int
,
int64_t
,
float
,
double
,
bool
,
plat
::
complex
<
float
>
,
plat
::
complex
<
double
>
)
{}
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
eb38c85f
...
...
@@ -19,26 +19,30 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
bool
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
complex
<
double
>>
);
REGISTER_OP_CUDA_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
bool
>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
FillZerosLikeKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
complex
<
double
>>
);
namespace
plat
=
paddle
::
platform
;
PD_REGISTER_STRUCT_KERNEL
(
fill_zeros_like
,
GPU
,
ALL_LAYOUT
,
ops
::
FillZerosLikeKernel
,
int
,
int64_t
,
float
,
double
,
plat
::
float16
,
bool
,
plat
::
complex
<
float
>
,
plat
::
complex
<
double
>
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fill_zeros_like2
,
GPU
,
ALL_LAYOUT
,
ops
::
FillZerosLikeKernel2
,
int
,
int64_t
,
float
,
double
,
plat
::
float16
,
bool
,
plat
::
complex
<
float
>
,
plat
::
complex
<
double
>
)
{}
paddle/fluid/operators/fill_zeros_like_op.h
浏览文件 @
eb38c85f
...
...
@@ -19,7 +19,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FillZerosLikeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -33,5 +33,8 @@ class FillZerosLikeKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
,
typename
DeviceContext
>
class
FillZerosLikeKernel2
:
public
FillZerosLikeKernel
<
T
,
DeviceContext
>
{};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/filter_by_instag_op.cc
浏览文件 @
eb38c85f
...
...
@@ -162,14 +162,20 @@ REGISTER_OPERATOR(filter_by_instag,
REGISTER_OPERATOR
(
filter_by_instag_grad
,
ops
::
FilterByInstagOpGrad
);
REGISTER_OP_CPU_KERNEL
(
filter_by_instag
,
ops
::
FilterByInstagKernel
<
float
>
,
ops
::
FilterByInstagKernel
<
double
>
,
ops
::
FilterByInstagKernel
<
int32_t
>
,
ops
::
FilterByInstagKernel
<
int64_t
>
);
REGISTER_OP_CPU_KERNEL
(
filter_by_instag_grad
,
ops
::
FilterByInstagGradKernel
<
float
>
,
ops
::
FilterByInstagGradKernel
<
double
>
,
ops
::
FilterByInstagGradKernel
<
int32_t
>
,
ops
::
FilterByInstagGradKernel
<
int64_t
>
);
PD_REGISTER_STRUCT_KERNEL
(
filter_by_instag
,
CPU
,
ALL_LAYOUT
,
ops
::
FilterByInstagKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
PD_REGISTER_STRUCT_KERNEL
(
filter_by_instag_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FilterByInstagGradKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
paddle/fluid/operators/filter_by_instag_op.cu
浏览文件 @
eb38c85f
...
...
@@ -325,7 +325,7 @@ __global__ void copy_grad_kernel(const size_t N,
#endif
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FilterByInstagGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -553,7 +553,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FilterByInstagGradGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -620,14 +620,20 @@ class FilterByInstagGradGPUKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
filter_by_instag
,
ops
::
FilterByInstagGPUKernel
<
float
>
,
ops
::
FilterByInstagGPUKernel
<
double
>
,
ops
::
FilterByInstagGPUKernel
<
int32_t
>
,
ops
::
FilterByInstagGPUKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
filter_by_instag_grad
,
ops
::
FilterByInstagGradGPUKernel
<
float
>
,
ops
::
FilterByInstagGradGPUKernel
<
double
>
,
ops
::
FilterByInstagGradGPUKernel
<
int32_t
>
,
ops
::
FilterByInstagGradGPUKernel
<
int64_t
>
);
PD_REGISTER_STRUCT_KERNEL
(
filter_by_instag
,
GPU
,
ALL_LAYOUT
,
ops
::
FilterByInstagGPUKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
PD_REGISTER_STRUCT_KERNEL
(
filter_by_instag_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FilterByInstagGradGPUKernel
,
float
,
double
,
int32_t
,
int64_t
)
{}
paddle/fluid/operators/filter_by_instag_op.h
浏览文件 @
eb38c85f
...
...
@@ -34,7 +34,7 @@ using SelectedRows = phi::SelectedRows;
template
<
typename
T
>
using
Vector
=
phi
::
Vector
<
T
>
;
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FilterByInstagKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -191,7 +191,7 @@ class FilterByInstagKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FilterByInstagGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
paddle/fluid/operators/fsp_op.cc
浏览文件 @
eb38c85f
...
...
@@ -164,9 +164,8 @@ REGISTER_OPERATOR(fsp,
ops
::
FSPGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
FSPGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
fsp_grad
,
ops
::
FSPOpGrad
);
REGISTER_OP_CPU_KERNEL
(
fsp
,
ops
::
FSPOpKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FSPOpKernel
<
phi
::
CPUContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
fsp_grad
,
ops
::
FSPGradOpKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FSPGradOpKernel
<
phi
::
CPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fsp
,
CPU
,
ALL_LAYOUT
,
ops
::
FSPOpKernel
,
float
,
double
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fsp_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FSPGradOpKernel
,
float
,
double
)
{}
paddle/fluid/operators/fsp_op.cu
浏览文件 @
eb38c85f
...
...
@@ -16,10 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fsp
,
ops
::
FSPOpKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FSPOpKernel
<
phi
::
GPUContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
fsp_grad
,
ops
::
FSPGradOpKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FSPGradOpKernel
<
phi
::
GPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fsp
,
GPU
,
ALL_LAYOUT
,
ops
::
FSPOpKernel
,
float
,
double
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fsp_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FSPGradOpKernel
,
float
,
double
)
{}
paddle/fluid/operators/fsp_op.h
浏览文件 @
eb38c85f
...
...
@@ -20,7 +20,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FSPOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -64,7 +64,7 @@ class FSPOpKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FSPGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc
浏览文件 @
eb38c85f
...
...
@@ -33,9 +33,11 @@ namespace platform = paddle::platform;
namespace
op
=
paddle
::
operators
;
USE_OP_ITSELF
(
batch_norm
);
USE_OP_ITSELF
(
fused_bn_add_activation
);
USE_OP_ITSELF
(
fused_bn_add_activation_grad
);
PD_DECLARE_KERNEL
(
batch_norm
,
GPU
,
ALL_LAYOUT
);
USE_CUDA_ONLY_OP
(
fused_bn_add_activation
);
USE_CUDA_ONLY_OP
(
fused_bn_add_activation_grad
);
PD_DECLARE_KERNEL
(
fused_bn_add_activation
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
fused_bn_add_activation_grad
,
GPU
,
ALL_LAYOUT
);
template
<
typename
T
>
void
InitRandomTensor
(
const
std
::
vector
<
int64_t
>
&
dims
,
...
...
paddle/fluid/operators/fused/fused_attention_op.cu
浏览文件 @
eb38c85f
...
...
@@ -75,7 +75,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT
#endif
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedAttentionOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -402,7 +402,7 @@ class FusedAttentionOpKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedAttentionGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -826,11 +826,18 @@ class FusedAttentionGradKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_attention
,
ops
::
FusedAttentionOpKernel
<
float
>
,
ops
::
FusedAttentionOpKernel
<
double
>
,
ops
::
FusedAttentionOpKernel
<
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_attention_grad
,
ops
::
FusedAttentionGradKernel
<
float
>
,
ops
::
FusedAttentionGradKernel
<
double
>
,
ops
::
FusedAttentionGradKernel
<
plat
::
float16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_attention
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedAttentionOpKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_attention_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedAttentionGradKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu
浏览文件 @
eb38c85f
...
...
@@ -25,7 +25,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBiasDropoutResidualLnOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -91,7 +91,7 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBiasDropoutResidualLnGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -176,12 +176,18 @@ class FusedBiasDropoutResidualLnGradKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_bias_dropout_residual_layer_norm
,
ops
::
FusedBiasDropoutResidualLnOpKernel
<
float
>
,
ops
::
FusedBiasDropoutResidualLnOpKernel
<
double
>
,
ops
::
FusedBiasDropoutResidualLnOpKernel
<
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_bias_dropout_residual_layer_norm_grad
,
ops
::
FusedBiasDropoutResidualLnGradKernel
<
float
>
,
ops
::
FusedBiasDropoutResidualLnGradKernel
<
double
>
,
ops
::
FusedBiasDropoutResidualLnGradKernel
<
plat
::
float16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_bias_dropout_residual_layer_norm
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBiasDropoutResidualLnOpKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_bias_dropout_residual_layer_norm_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBiasDropoutResidualLnGradKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_bn_activation_op.cu
浏览文件 @
eb38c85f
...
...
@@ -36,10 +36,15 @@ template <typename T>
using
BatchNormParamType
=
typename
CudnnDataType
<
T
>::
BatchNormParamType
;
template
<
typename
T
>
class
FusedBatchNormActKernel
<
phi
::
GPUContext
,
T
>
class
FusedBatchNormActKernel
<
T
,
phi
::
GPUContext
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDNN_VERSION < 7401
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_batch_norm_act operator is not supported on GPU "
"when CUDNN version < 7.4.1"
));
#endif
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
...
...
@@ -231,10 +236,15 @@ class FusedBatchNormActKernel<phi::GPUContext, T>
};
template
<
typename
T
>
class
FusedBatchNormActGradKernel
<
phi
::
GPUContext
,
T
>
class
FusedBatchNormActGradKernel
<
T
,
phi
::
GPUContext
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDNN_VERSION < 7401
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_batch_norm_act operator is not supported on GPU "
"when CUDNN version < 7.4.1"
));
#endif
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
...
...
@@ -415,17 +425,19 @@ class FusedBatchNormActGradKernel<phi::GPUContext, T>
}
// namespace operators
}
// namespace paddle
#if CUDNN_VERSION >= 7401
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_batch_norm_act
,
ops
::
FusedBatchNormActKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedBatchNormActKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedBatchNormActKernel
<
phi
::
GPUContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_batch_norm_act_grad
,
ops
::
FusedBatchNormActGradKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedBatchNormActGradKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedBatchNormActGradKernel
<
phi
::
GPUContext
,
plat
::
float16
>
);
#endif
PD_REGISTER_STRUCT_KERNEL
(
fused_batch_norm_act
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBatchNormActKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_batch_norm_act_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBatchNormActGradKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_bn_activation_op.h
浏览文件 @
eb38c85f
...
...
@@ -88,13 +88,13 @@ class FusedBatchNormActOpInferVarType
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBatchNormActKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBatchNormActGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
...
...
paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
浏览文件 @
eb38c85f
...
...
@@ -36,10 +36,15 @@ template <typename T>
using
BatchNormParamType
=
typename
CudnnDataType
<
T
>::
BatchNormParamType
;
template
<
typename
T
>
class
FusedBatchNormAddActKernel
<
phi
::
GPUContext
,
T
>
class
FusedBatchNormAddActKernel
<
T
,
phi
::
GPUContext
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDNN_VERSION < 7401
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_bn_add_activation operator is not supported on GPU "
"when CUDNN version < 7.4.1"
));
#endif
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
...
...
@@ -208,10 +213,15 @@ class FusedBatchNormAddActKernel<phi::GPUContext, T>
};
template
<
typename
T
>
class
FusedBatchNormAddActGradKernel
<
phi
::
GPUContext
,
T
>
class
FusedBatchNormAddActGradKernel
<
T
,
phi
::
GPUContext
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDNN_VERSION < 7401
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_bn_add_activation operator is not supported on GPU "
"when CUDNN version < 7.4.1"
));
#endif
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
true
,
...
...
@@ -362,13 +372,15 @@ class FusedBatchNormAddActGradKernel<phi::GPUContext, T>
}
// namespace operators
}
// namespace paddle
#if CUDNN_VERSION >= 7401
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_bn_add_activation
,
ops
::
FusedBatchNormAddActKernel
<
phi
::
GPUContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_bn_add_activation_grad
,
ops
::
FusedBatchNormAddActGradKernel
<
phi
::
GPUContext
,
plat
::
float16
>
);
#endif
PD_REGISTER_STRUCT_KERNEL
(
fused_bn_add_activation
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBatchNormAddActKernel
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_bn_add_activation_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedBatchNormAddActGradKernel
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_bn_add_activation_op.h
浏览文件 @
eb38c85f
...
...
@@ -89,13 +89,13 @@ class FusedBatchNormAddActOpInferVarType
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBatchNormAddActKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedBatchNormAddActGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
...
...
paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
浏览文件 @
eb38c85f
...
...
@@ -461,15 +461,19 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR
(
fused_elemwise_activation_grad
,
ops
::
FusedElemwiseActivationOpGrad
);
REGISTER_OP_CPU_KERNEL
(
fused_elemwise_activation
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
CPUContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
fused_elemwise_activation_grad
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
CPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_activation
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseActivationKernel
,
float
,
double
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_activation_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseActivationGradKernel
,
float
,
double
)
{}
// for memory optimization, we register the fused_elemwise_add_activation OP
REGISTER_OPERATOR
(
...
...
@@ -482,12 +486,16 @@ REGISTER_OPERATOR(fused_elemwise_add_activation_grad,
ops
::
FusedElemwiseAddActivationNoNeddBufVarInferer
,
ops
::
FusedElemwiseAddActivationOpGrad
);
REGISTER_OP_CPU_KERNEL
(
fused_elemwise_add_activation
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
CPUContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
fused_elemwise_add_activation_grad
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
CPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_add_activation
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseAddActivationKernel
,
float
,
double
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_add_activation_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseAddActivationGradKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fused_elemwise_activation_op.cu
浏览文件 @
eb38c85f
...
...
@@ -15,30 +15,34 @@ limitations under the License. */
#include "paddle/fluid/operators/fused/fused_elemwise_activation_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_elemwise_activation
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_elemwise_activation_grad
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_elemwise_add_activation
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedElemwiseActivationKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_elemwise_add_activation_grad
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedElemwiseActivationGradKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
namespace
plat
=
paddle
::
platform
;
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_activation
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseActivationKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_activation_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseActivationGradKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_add_activation
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseAddActivationKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_elemwise_add_activation_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedElemwiseAddActivationGradKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_elemwise_activation_op.h
浏览文件 @
eb38c85f
...
...
@@ -616,7 +616,7 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx,
}
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedElemwiseActivationKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -655,7 +655,7 @@ class FusedElemwiseActivationKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedElemwiseActivationGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -765,5 +765,14 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel<T> {
}
}
};
template
<
typename
T
,
typename
DeviceContext
>
class
FusedElemwiseAddActivationKernel
:
public
FusedElemwiseActivationKernel
<
T
,
DeviceContext
>
{};
template
<
typename
T
,
typename
DeviceContext
>
class
FusedElemwiseAddActivationGradKernel
:
public
FusedElemwiseActivationGradKernel
<
T
,
DeviceContext
>
{};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
浏览文件 @
eb38c85f
...
...
@@ -29,7 +29,7 @@
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
EmbeddingEltWiseLayerNormKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -145,14 +145,18 @@ class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 10000
REGISTER_OP_CUDA_KERNEL
(
fused_embedding_eltwise_layernorm
,
ops
::
EmbeddingEltWiseLayerNormKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
EmbeddingEltWiseLayerNormKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_embedding_eltwise_layernorm
,
GPU
,
ALL_LAYOUT
,
ops
::
EmbeddingEltWiseLayerNormKernel
,
float
,
plat
::
float16
)
{}
#else
REGISTER_OP_CUDA_KERNEL
(
fused_embedding_eltwise_layernorm
,
ops
::
EmbeddingEltWiseLayerNormKernel
<
phi
::
GPUContext
,
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_embedding_eltwise_layernorm
,
GPU
,
ALL_LAYOUT
,
ops
::
EmbeddingEltWiseLayerNormKernel
,
float
)
{}
#endif
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
eb38c85f
...
...
@@ -270,7 +270,7 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedEmbeddingFCLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
#define INIT_VEC_FUNC \
...
...
@@ -396,7 +396,6 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
GET_Ht(ct, gates, ht)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
phi
::
CPUContext
;
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
INIT_VEC_FUNC
...
...
@@ -502,7 +501,6 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
phi
::
CPUContext
;
INIT_BASE_INPUT_OUTPUT
if
(
ids
->
lod
()[
0
].
size
()
==
2
)
{
SeqCompute
(
ctx
);
...
...
@@ -682,6 +680,9 @@ REGISTER_OPERATOR(fused_embedding_fc_lstm,
ops
::
FusedEmbeddingFCLSTMOp
,
ops
::
FusedEmbeddingFCLSTMOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fused_embedding_fc_lstm
,
ops
::
FusedEmbeddingFCLSTMKernel
<
float
>
,
ops
::
FusedEmbeddingFCLSTMKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_embedding_fc_lstm
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedEmbeddingFCLSTMKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
浏览文件 @
eb38c85f
...
...
@@ -201,9 +201,15 @@ REGISTER_OPERATOR(fused_embedding_seq_pool_grad,
ops
::
FusedEmbeddingSeqPoolOpGrad
,
ops
::
FusedEmbeddingSeqPoolOpGradVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
fused_embedding_seq_pool
,
ops
::
FusedEmbeddingSeqPoolKernel
<
float
>
,
ops
::
FusedEmbeddingSeqPoolKernel
<
double
>
);
REGISTER_OP_CPU_KERNEL
(
fused_embedding_seq_pool_grad
,
ops
::
FusedEmbeddingSeqPoolGradKernel
<
float
>
,
ops
::
FusedEmbeddingSeqPoolGradKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_embedding_seq_pool
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedEmbeddingSeqPoolKernel
,
float
,
double
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_embedding_seq_pool_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedEmbeddingSeqPoolGradKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
eb38c85f
...
...
@@ -135,7 +135,7 @@ inline int FusedEmbeddingSeqPoolLastDim(const framework::DDim &table_dims,
return
last_dim
;
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedEmbeddingSeqPoolKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -224,7 +224,7 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedEmbeddingSeqPoolGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu
浏览文件 @
eb38c85f
...
...
@@ -374,7 +374,7 @@ void AddReluAddLayerNorm(gpuStream_t stream,
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedFCElementwiseLayerNormOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -449,8 +449,12 @@ class FusedFCElementwiseLayerNormOpKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_fc_elementwise_layernorm
,
ops
::
FusedFCElementwiseLayerNormOpKernel
<
phi
::
dtype
::
float16
>
,
ops
::
FusedFCElementwiseLayerNormOpKernel
<
float
>
,
ops
::
FusedFCElementwiseLayerNormOpKernel
<
double
>
);
namespace
plat
=
paddle
::
platform
;
PD_REGISTER_STRUCT_KERNEL
(
fused_fc_elementwise_layernorm
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedFCElementwiseLayerNormOpKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_feedforward_op.cu
浏览文件 @
eb38c85f
...
...
@@ -65,7 +65,7 @@ static void AllReduce(phi::DenseTensor& tensor, // NOLINT
#endif
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedFeedForwardKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
MatMul
(
const
phi
::
GPUContext
&
ctx
,
...
...
@@ -301,7 +301,7 @@ class FusedFeedForwardKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedFeedForwardGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
MatMulGrad
(
const
phi
::
GPUContext
&
ctx
,
...
...
@@ -628,14 +628,19 @@ class FusedFeedForwardGradKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_feedforward
,
ops
::
FusedFeedForwardKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedFeedForwardKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedFeedForwardKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_feedforward_grad
,
ops
::
FusedFeedForwardGradKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedFeedForwardGradKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedFeedForwardGradKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
);
namespace
plat
=
paddle
::
platform
;
PD_REGISTER_STRUCT_KERNEL
(
fused_feedforward
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedFeedForwardKernel
,
float
,
double
,
plat
::
float16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_feedforward_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedFeedForwardGradKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_gate_attention_op.cu
浏览文件 @
eb38c85f
...
...
@@ -354,7 +354,7 @@ void ComputeOutputLinearBackward(const framework::ExecutionContext &ctx,
use_fused_matmul_bias
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedGateAttentionOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -446,7 +446,7 @@ class FusedGateAttentionOpKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedGateAttentionGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -565,23 +565,35 @@ class FusedGateAttentionGradKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
#ifdef PADDLE_WITH_HIP
REGISTER_OP_CUDA_KERNEL
(
fused_gate_attention
,
ops
::
FusedGateAttentionOpKernel
<
float
>
,
ops
::
FusedGateAttentionOpKernel
<
plat
::
float16
>
,
ops
::
FusedGateAttentionOpKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_gate_attention_grad
,
ops
::
FusedGateAttentionGradKernel
<
float
>
,
ops
::
FusedGateAttentionGradKernel
<
plat
::
float16
>
,
ops
::
FusedGateAttentionGradKernel
<
plat
::
bfloat16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_gate_attention
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGateAttentionOpKernel
,
float
,
plat
::
float16
,
plat
::
bfloat16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_gate_attention_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGateAttentionGradKernel
,
float
,
plat
::
float16
,
plat
::
bfloat16
)
{}
#else
REGISTER_OP_CUDA_KERNEL
(
fused_gate_attention
,
ops
::
FusedGateAttentionOpKernel
<
float
>
,
ops
::
FusedGateAttentionOpKernel
<
double
>
,
ops
::
FusedGateAttentionOpKernel
<
plat
::
float16
>
,
ops
::
FusedGateAttentionOpKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_gate_attention_grad
,
ops
::
FusedGateAttentionGradKernel
<
float
>
,
ops
::
FusedGateAttentionGradKernel
<
double
>
,
ops
::
FusedGateAttentionGradKernel
<
plat
::
float16
>
,
ops
::
FusedGateAttentionGradKernel
<
plat
::
bfloat16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_gate_attention
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGateAttentionOpKernel
,
float
,
double
,
plat
::
float16
,
plat
::
bfloat16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_gate_attention_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGateAttentionGradKernel
,
float
,
double
,
plat
::
float16
,
plat
::
bfloat16
)
{}
#endif
paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu
浏览文件 @
eb38c85f
...
...
@@ -61,10 +61,15 @@ phi::funcs::MatmulFusedType GetFwdFusedEpilogueType(
return
fused_type
;
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedGemmEpilogueKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDA_VERSION < 11060
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_gemm_epilogue operator only support CUDA 11.6 "
"or higher version."
));
#endif
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
const
phi
::
DenseTensor
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -119,10 +124,15 @@ class FusedGemmEpilogueKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedGemmEpilogueGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDA_VERSION < 11060
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The fused_gemm_epilogue operator only support CUDA 11.6 "
"or higher version."
));
#endif
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
const
phi
::
DenseTensor
*
dout
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"DOut"
);
...
...
@@ -172,21 +182,21 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
#if CUDA_VERSION >= 11060
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_gemm_epilogue
,
ops
::
FusedGemmEpilogueKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedGemmEpilogueKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedGemmEpilogueKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
,
ops
::
FusedGemmEpilogueKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
bfloat16
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_gemm_epilogue_grad
,
ops
::
FusedGemmEpilogueGradKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusedGemmEpilogueGradKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusedGemmEpilogueGradKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
float16
>
,
ops
::
FusedGemmEpilogueGradKernel
<
phi
::
GPUContext
,
paddle
::
platform
::
bfloat16
>
);
#endif
namespace
plat
=
paddle
::
platform
;
PD_REGISTER_STRUCT_KERNEL
(
fused_gemm_epilogue
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGemmEpilogueKernel
,
float
,
double
,
plat
::
float16
,
plat
::
bfloat16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_gemm_epilogue_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedGemmEpilogueGradKernel
,
float
,
double
,
plat
::
float16
,
plat
::
bfloat16
)
{}
paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu
浏览文件 @
eb38c85f
...
...
@@ -18,7 +18,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedMultiTransformerINT8OpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -662,6 +662,9 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_multi_transformer_int8
,
ops
::
FusedMultiTransformerINT8OpKernel
<
plat
::
float16
>
,
ops
::
FusedMultiTransformerINT8OpKernel
<
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_multi_transformer_int8
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedMultiTransformerINT8OpKernel
,
float
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_multi_transformer_op.cu
浏览文件 @
eb38c85f
...
...
@@ -19,7 +19,7 @@ namespace operators {
#if CUDA_VERSION >= 11060 // Use cublasLt to fuse FFN operation.
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedMultiTransformerOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -685,7 +685,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
#else
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedMultiTransformerOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -1370,6 +1370,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_multi_transformer
,
ops
::
FusedMultiTransformerOpKernel
<
plat
::
float16
>
,
ops
::
FusedMultiTransformerOpKernel
<
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_multi_transformer
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedMultiTransformerOpKernel
,
float
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
浏览文件 @
eb38c85f
...
...
@@ -290,7 +290,13 @@ REGISTER_OPERATOR(fused_seqpool_cvm,
ops
::
FusedSeqpoolCVMGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
fused_seqpool_cvm_grad
,
ops
::
FusedSeqpoolCVMGradOp
)
REGISTER_OP_CPU_KERNEL
(
fused_seqpool_cvm
,
ops
::
FusedSeqpoolCVMOpCPUKernel
<
float
>
)
REGISTER_OP_CPU_KERNEL
(
fused_seqpool_cvm_grad
,
ops
::
FusedSeqpoolCVMGradOpCPUKernel
<
float
>
)
PD_REGISTER_STRUCT_KERNEL
(
fused_seqpool_cvm
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedSeqpoolCVMOpCPUKernel
,
float
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_seqpool_cvm_grad
,
CPU
,
ALL_LAYOUT
,
ops
::
FusedSeqpoolCVMGradOpCPUKernel
,
float
)
{}
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
浏览文件 @
eb38c85f
...
...
@@ -420,7 +420,7 @@ void FusedSeqpoolCVMGrad(const framework::ExecutionContext &ctx,
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedSeqpoolCVMCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -505,7 +505,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedSeqpoolCVMGradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -588,8 +588,11 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_seqpool_cvm
,
ops
::
FusedSeqpoolCVMCUDAKernel
<
float
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_seqpool_cvm_grad
,
ops
::
FusedSeqpoolCVMGradCUDAKernel
<
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_seqpool_cvm
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedSeqpoolCVMCUDAKernel
,
float
)
{
}
PD_REGISTER_STRUCT_KERNEL
(
fused_seqpool_cvm_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedSeqpoolCVMGradCUDAKernel
,
float
)
{}
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
浏览文件 @
eb38c85f
...
...
@@ -23,7 +23,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedSeqpoolCVMOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -33,7 +33,7 @@ class FusedSeqpoolCVMOpCPUKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedSeqpoolCVMGradOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
paddle/fluid/operators/fused/fusion_conv_inception_op.cu
浏览文件 @
eb38c85f
...
...
@@ -34,10 +34,15 @@ using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;
template
<
typename
T
>
using
CudnnDataType
=
platform
::
CudnnDataType
<
T
>
;
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
CUDNNConvInceptionFusionOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#if CUDNN_VERSION < 7100
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"The conv2d_inception_fusion operator is not supported on GPU "
"when CUDNN version < 7.1.0"
));
#endif
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
auto
*
input
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Input"
);
auto
filters
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Filter"
);
...
...
@@ -336,9 +341,10 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
#if CUDNN_VERSION >= 7100
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
conv2d_inception_fusion
,
ops
::
CUDNNConvInceptionFusionOpKernel
<
float
>
,
ops
::
CUDNNConvInceptionFusionOpKernel
<
double
>
);
#endif
PD_REGISTER_STRUCT_KERNEL
(
conv2d_inception_fusion
,
GPU
,
ALL_LAYOUT
,
ops
::
CUDNNConvInceptionFusionOpKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_group_op.cu.cc
浏览文件 @
eb38c85f
...
...
@@ -18,7 +18,10 @@ limitations under the License. */
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fusion_group
,
ops
::
FusionGroupKernel
<
phi
::
GPUContext
,
float
>
,
ops
::
FusionGroupKernel
<
phi
::
GPUContext
,
double
>
,
ops
::
FusionGroupKernel
<
phi
::
GPUContext
,
plat
::
float16
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_group
,
GPU
,
ALL_LAYOUT
,
ops
::
FusionGroupKernel
,
float
,
double
,
plat
::
float16
)
{}
paddle/fluid/operators/fused/fusion_group_op.h
浏览文件 @
eb38c85f
...
...
@@ -42,7 +42,7 @@ static void MutableMultiTypeData(std::vector<phi::DenseTensor*>* var,
}
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionGroupKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
paddle/fluid/operators/fused/fusion_group_op_test.cc
浏览文件 @
eb38c85f
...
...
@@ -234,4 +234,5 @@ void elementwise_cuda_kernel_0(size_t n, float *x, float* y, float* z) {
}
// namespace operators
}
// namespace paddle
USE_CUDA_ONLY_OP
(
fusion_group
);
USE_OP_ITSELF
(
fusion_group
);
PD_DECLARE_KERNEL
(
fusion_group
,
GPU
,
ALL_LAYOUT
);
paddle/fluid/operators/fused/fusion_gru_op.cc
浏览文件 @
eb38c85f
...
...
@@ -249,7 +249,7 @@ more details can refer to GRU op.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionGRUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -303,7 +303,6 @@ class FusionGRUKernel : public framework::OpKernel<T> {
T* xx_data = xx->mutable_data<T>(place)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
phi
::
CPUContext
;
INIT_BASE_DEFINES
;
INIT_OTHER_DEFINES
;
const
int
N
=
x_lod
[
0
].
size
()
-
1
;
...
...
@@ -394,7 +393,6 @@ class FusionGRUKernel : public framework::OpKernel<T> {
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
phi
::
CPUContext
;
INIT_BASE_DEFINES
;
if
(
x_lod
[
0
].
size
()
==
2
)
{
xx
->
Resize
({
total_T
,
D3
});
...
...
@@ -551,9 +549,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fusion_gru
,
ops
::
FusionGRUOp
,
ops
::
FusionGRUOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_gru
,
ops
::
FusionGRUKernel
<
float
>
,
ops
::
FusionGRUKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_gru
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionGRUKernel
,
float
,
double
)
{}
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION
(
fusion_gru
)
...
...
paddle/fluid/operators/fused/fusion_lstm_op.cc
浏览文件 @
eb38c85f
...
...
@@ -298,11 +298,10 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FuisonLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
#define INIT_BASE_DEFINES \
using DeviceContext = phi::CPUContext; \
auto* x = ctx.Input<phi::DenseTensor>("X"); \
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
...
...
@@ -580,6 +579,5 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fusion_lstm
,
ops
::
FusionLSTMOp
,
ops
::
FusionLSTMOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_lstm
,
ops
::
FuisonLSTMKernel
<
float
>
,
ops
::
FuisonLSTMKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_lstm
,
CPU
,
ALL_LAYOUT
,
ops
::
FuisonLSTMKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
浏览文件 @
eb38c85f
...
...
@@ -141,7 +141,7 @@ static void fc_relu(const T* x,
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionRepeatedFCReluKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -201,6 +201,9 @@ REGISTER_OPERATOR(fusion_repeated_fc_relu,
ops
::
FusionRepeatedFCReluOp
,
ops
::
FusionRepeatedFCReluOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_repeated_fc_relu
,
ops
::
FusionRepeatedFCReluKernel
<
float
>
,
ops
::
FusionRepeatedFCReluKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_repeated_fc_relu
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionRepeatedFCReluKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
浏览文件 @
eb38c85f
...
...
@@ -148,11 +148,10 @@ Fusion Sequence Conv and ElementwiseAdd Operator.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionSeqConvEltAddReluKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Filter"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
...
...
@@ -283,6 +282,9 @@ REGISTER_OPERATOR(fusion_seqconv_eltadd_relu,
ops
::
FusionSeqConvEltAddReluOp
,
ops
::
FusionSeqConvEltAddReluOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqconv_eltadd_relu
,
ops
::
FusionSeqConvEltAddReluKernel
<
float
>
,
ops
::
FusionSeqConvEltAddReluKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_seqconv_eltadd_relu
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionSeqConvEltAddReluKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
浏览文件 @
eb38c85f
...
...
@@ -147,11 +147,10 @@ The concat axis should be 1.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionSeqExpandConcatFCOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
auto
ins
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCWeight"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCBias"
);
...
...
@@ -295,6 +294,9 @@ REGISTER_OPERATOR(fusion_seqexpand_concat_fc,
ops
::
FusionSeqExpandConcatFCOp
,
ops
::
FusionSeqExpandConcatFCOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqexpand_concat_fc
,
ops
::
FusionSeqExpandConcatFCOpKernel
<
float
>
,
ops
::
FusionSeqExpandConcatFCOpKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_seqexpand_concat_fc
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionSeqExpandConcatFCOpKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
浏览文件 @
eb38c85f
...
...
@@ -92,7 +92,7 @@ Fusion Sequence Pool of pooltype(sum, average and sqrt) and Concat Operator.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionSeqPoolConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -173,6 +173,9 @@ REGISTER_OPERATOR(fusion_seqpool_concat,
ops
::
FusionSeqPoolConcatOp
,
ops
::
FusionSeqPoolConcatOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqpool_concat
,
ops
::
FusionSeqPoolConcatKernel
<
float
>
,
ops
::
FusionSeqPoolConcatKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_seqpool_concat
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionSeqPoolConcatKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
浏览文件 @
eb38c85f
...
...
@@ -96,7 +96,7 @@ Fusion Sequence Pool of pooltype(sum, average and sqrt) and Concat Operator.
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionSeqPoolCVMConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -172,6 +172,9 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqpool_cvm_concat
,
ops
::
FusionSeqPoolCVMConcatKernel
<
float
>
,
ops
::
FusionSeqPoolCVMConcatKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_seqpool_cvm_concat
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionSeqPoolCVMConcatKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc
浏览文件 @
eb38c85f
...
...
@@ -84,7 +84,7 @@ void FusionSquaredMatSubOpMaker::Make() {
)DOC"
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusionSquaredMatSubKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -151,6 +151,9 @@ REGISTER_OPERATOR(fusion_squared_mat_sub,
ops
::
FusionSquaredMatSubOp
,
ops
::
FusionSquaredMatSubOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fusion_squared_mat_sub
,
ops
::
FusionSquaredMatSubKernel
<
float
>
,
ops
::
FusionSquaredMatSubKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_squared_mat_sub
,
CPU
,
ALL_LAYOUT
,
ops
::
FusionSquaredMatSubKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
浏览文件 @
eb38c85f
...
...
@@ -24,7 +24,7 @@ namespace operators {
template
<
typename
T
>
using
CudnnDataType
=
platform
::
CudnnDataType
<
T
>
;
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
TransposeFlattenConcatFusionKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -119,6 +119,10 @@ class TransposeFlattenConcatFusionKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fusion_transpose_flatten_concat
,
ops
::
TransposeFlattenConcatFusionKernel
<
float
>
,
ops
::
TransposeFlattenConcatFusionKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fusion_transpose_flatten_concat
,
GPU
,
ALL_LAYOUT
,
ops
::
TransposeFlattenConcatFusionKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc
浏览文件 @
eb38c85f
...
...
@@ -102,7 +102,10 @@ REGISTER_OPERATOR(
ops
::
SoftmaxMaskFuseUpperTriangleGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
fused_softmax_mask_upper_triangle_grad
,
ops
::
SoftmaxMaskFuseUpperTriangleOpGrad
);
REGISTER_OP_CPU_KERNEL
(
fused_softmax_mask_upper_triangle
,
ops
::
SoftmaxMaskFuseUpperTriangleCPUKernel
<
phi
::
CPUContext
,
float
>
,
ops
::
SoftmaxMaskFuseUpperTriangleCPUKernel
<
phi
::
CPUContext
,
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_softmax_mask_upper_triangle
,
CPU
,
ALL_LAYOUT
,
ops
::
SoftmaxMaskFuseUpperTriangleCPUKernel
,
float
,
double
)
{}
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu
浏览文件 @
eb38c85f
...
...
@@ -354,7 +354,7 @@ __global__ void SoftmaxMaskFuseUpperTriangleGradGPUKernel(const T* grad_input,
}
}
template
<
typename
Place
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
SoftmaxMaskFuseUpperTriangleKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -386,7 +386,8 @@ class SoftmaxMaskFuseUpperTriangleKernel : public framework::OpKernel<T> {
"received the last dimension of x is %d"
,
key_seq_len
));
auto
&
place
=
*
context
.
template
device_context
<
Place
>().
eigen_device
();
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
int
pow2_index
=
get_pow2_index_value
(
key_seq_len
);
...
...
@@ -470,7 +471,7 @@ class SoftmaxMaskFuseUpperTriangleKernel : public framework::OpKernel<T> {
}
};
template
<
typename
Place
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
SoftmaxMaskFuseUpperTriangleGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -491,7 +492,8 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel<T> {
auto
query_seq_len
=
y_dim
[
2
];
auto
key_seq_len
=
y_dim
[
3
];
auto
&
place
=
*
context
.
template
device_context
<
Place
>().
eigen_device
();
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
int
pow2_index
=
get_pow2_index_value
(
key_seq_len
);
...
...
@@ -602,14 +604,18 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
fused_softmax_mask_upper_triangle
,
ops
::
SoftmaxMaskFuseUpperTriangleKernel
<
phi
::
GPUContext
,
plat
::
float16
>
,
ops
::
SoftmaxMaskFuseUpperTriangleKernel
<
phi
::
GPUContext
,
plat
::
bfloat16
>
,
ops
::
SoftmaxMaskFuseUpperTriangleKernel
<
phi
::
GPUContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
fused_softmax_mask_upper_triangle_grad
,
ops
::
SoftmaxMaskFuseUpperTriangleGradKernel
<
phi
::
GPUContext
,
plat
::
float16
>
,
ops
::
SoftmaxMaskFuseUpperTriangleGradKernel
<
phi
::
GPUContext
,
plat
::
bfloat16
>
,
ops
::
SoftmaxMaskFuseUpperTriangleGradKernel
<
phi
::
GPUContext
,
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_softmax_mask_upper_triangle
,
GPU
,
ALL_LAYOUT
,
ops
::
SoftmaxMaskFuseUpperTriangleKernel
,
float
,
plat
::
float16
,
plat
::
bfloat16
)
{}
PD_REGISTER_STRUCT_KERNEL
(
fused_softmax_mask_upper_triangle_grad
,
GPU
,
ALL_LAYOUT
,
ops
::
SoftmaxMaskFuseUpperTriangleGradKernel
,
float
,
plat
::
float16
,
plat
::
bfloat16
)
{}
paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.h
浏览文件 @
eb38c85f
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
SoftmaxMaskFuseUpperTriangleCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
paddle/fluid/operators/fused_token_prune_op.cu
浏览文件 @
eb38c85f
...
...
@@ -79,7 +79,7 @@ __global__ void MaximumFirst(T* mat, int num_raws, int num_cols, T max_value) {
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FusedTokenPruneOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -283,6 +283,9 @@ class FusedTokenPruneOpCUDAKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fused_token_prune
,
ops
::
FusedTokenPruneOpCUDAKernel
<
float
>
,
ops
::
FusedTokenPruneOpCUDAKernel
<
double
>
);
PD_REGISTER_STRUCT_KERNEL
(
fused_token_prune
,
GPU
,
ALL_LAYOUT
,
ops
::
FusedTokenPruneOpCUDAKernel
,
float
,
double
)
{}
paddle/fluid/operators/optimizers/ftrl_op.cc
浏览文件 @
eb38c85f
...
...
@@ -156,4 +156,5 @@ The paper that proposed Follow The Regularized Leader (FTRL):
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
ftrl
,
ops
::
FTRLOp
,
ops
::
FTRLOpMaker
);
REGISTER_OP_CPU_KERNEL
(
ftrl
,
ops
::
FTRLOpKernel
<
phi
::
CPUContext
,
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
ftrl
,
CPU
,
ALL_LAYOUT
,
ops
::
FTRLOpKernel
,
float
)
{}
paddle/fluid/operators/optimizers/ftrl_op.cu
浏览文件 @
eb38c85f
...
...
@@ -13,4 +13,4 @@ specific language governing permissions and limitations under the License. */
#include "paddle/fluid/operators/optimizers/ftrl_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
ftrl
,
ops
::
FTRLOpKernel
<
phi
::
GPUContext
,
float
>
);
PD_REGISTER_STRUCT_KERNEL
(
ftrl
,
GPU
,
ALL_LAYOUT
,
ops
::
FTRLOpKernel
,
float
)
{}
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
eb38c85f
...
...
@@ -113,7 +113,7 @@ class SparseFTRLFunctor {
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FTRLOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
paddle/fluid/operators/string/faster_tokenizer_op.cc
浏览文件 @
eb38c85f
...
...
@@ -541,4 +541,5 @@ REGISTER_OPERATOR(faster_tokenizer,
ops
::
FasterTokenizerOp
,
ops
::
FasterTokenizerOpMaker
);
REGISTER_OP_CPU_KERNEL
(
faster_tokenizer
,
ops
::
FasterTokenizerKernel
<
int64_t
>
);
PD_REGISTER_STRUCT_KERNEL
(
faster_tokenizer
,
CPU
,
ALL_LAYOUT
,
ops
::
FasterTokenizerKernel
,
int64_t
)
{}
paddle/fluid/operators/string/faster_tokenizer_op.h
浏览文件 @
eb38c85f
...
...
@@ -122,7 +122,7 @@ class BertTokenizer {
InvVocab
inv_vocab_
;
};
template
<
typename
T
>
template
<
typename
T
,
typename
DeviceContext
>
class
FasterTokenizerKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录