未验证 提交 1c12f2f3 编写于 作者: H huangjiyi 提交者: GitHub

Register fluid kerenls to phi [part 10] (#53034)

* update

* update

* Revert "update"

* fix bug

* update
上级 20a66bbf
......@@ -214,6 +214,9 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
*(op_with_kernel->PhiKernelSignature()),
runtime_context,
*dev_ctx);
} else if (new_op_func_node.phi_kernel_->GetKernelRegisteredType() ==
phi::KernelRegisteredType::STRUCTURE) {
(*new_op_func_node.phi_kernel_)(&exec_ctx);
} else {
phi::KernelContext phi_kernel_context;
op_with_kernel->BuildPhiKernelContext(
......
......@@ -2468,7 +2468,7 @@ Scope* OperatorWithKernel::PrepareData(
expected_kernel_key.layout(),
expected_kernel_key.dtype());
}
} else if (in_def != nullptr &&
} else if (in_def != nullptr && // KernelRegisteredType is Function
in_def->backend != phi::Backend::ALL_BACKEND) {
auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
if ((in_def->backend != tensor_backend &&
......
......@@ -149,9 +149,15 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(sequence_softmax_grad,
ops::SequenceSoftmaxGradOp,
ops::SequenceSoftmaxGradOpNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(sequence_softmax,
ops::SequenceSoftmaxKernel<phi::CPUContext, float>,
ops::SequenceSoftmaxKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(sequence_softmax_grad,
ops::SequenceSoftmaxGradKernel<phi::CPUContext, float>,
ops::SequenceSoftmaxGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(sequence_softmax,
CPU,
ALL_LAYOUT,
ops::SequenceSoftmaxKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(sequence_softmax_grad,
CPU,
ALL_LAYOUT,
ops::SequenceSoftmaxGradKernel,
float,
double) {}
......@@ -177,10 +177,15 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(sequence_softmax,
ops::SequenceSoftmaxKernel<phi::GPUContext, float>,
ops::SequenceSoftmaxKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(
sequence_softmax_grad,
ops::SequenceSoftmaxGradKernel<phi::GPUContext, float>,
ops::SequenceSoftmaxGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(sequence_softmax,
GPU,
ALL_LAYOUT,
ops::SequenceSoftmaxKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(sequence_softmax_grad,
GPU,
ALL_LAYOUT,
ops::SequenceSoftmaxGradKernel,
float,
double) {}
......@@ -86,7 +86,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceSoftmaxKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -130,7 +130,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceSoftmaxGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -137,9 +137,13 @@ REGISTER_OPERATOR(
ops::SequenceTopkAvgPoolGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(sequence_topk_avg_pooling_grad,
ops::SequenceTopkAvgPoolingGradOp);
REGISTER_OP_CPU_KERNEL(
sequence_topk_avg_pooling,
ops::SequenceTopkAvgPoolingKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(
sequence_topk_avg_pooling_grad,
ops::SequenceTopkAvgPoolingGradKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(sequence_topk_avg_pooling,
CPU,
ALL_LAYOUT,
ops::SequenceTopkAvgPoolingKernel,
float) {}
PD_REGISTER_STRUCT_KERNEL(sequence_topk_avg_pooling_grad,
CPU,
ALL_LAYOUT,
ops::SequenceTopkAvgPoolingGradKernel,
float) {}
......@@ -67,7 +67,7 @@ static void get_topk_pos(const T* data, int length, int k, int* pos) {
}
} // namespace details
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -178,7 +178,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceTopkAvgPoolingGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -194,14 +194,19 @@ REGISTER_OPERATOR(sequence_unpad,
REGISTER_OPERATOR(sequence_unpad_grad,
ops::SequenceUnpadGradOp,
ops::SequenceUnpadGradOpNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(sequence_unpad,
ops::SequenceUnpadOpKernel<phi::CPUContext, float>,
ops::SequenceUnpadOpKernel<phi::CPUContext, double>,
ops::SequenceUnpadOpKernel<phi::CPUContext, int>,
ops::SequenceUnpadOpKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
sequence_unpad_grad,
ops::SequenceUnpadGradOpKernel<phi::CPUContext, float>,
ops::SequenceUnpadGradOpKernel<phi::CPUContext, double>,
ops::SequenceUnpadGradOpKernel<phi::CPUContext, int>,
ops::SequenceUnpadGradOpKernel<phi::CPUContext, int64_t>);
PD_REGISTER_STRUCT_KERNEL(sequence_unpad,
CPU,
ALL_LAYOUT,
ops::SequenceUnpadOpKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad,
CPU,
ALL_LAYOUT,
ops::SequenceUnpadGradOpKernel,
float,
double,
int,
int64_t) {}
......@@ -15,14 +15,19 @@ limitations under the License. */
#include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(sequence_unpad,
ops::SequenceUnpadOpKernel<phi::GPUContext, float>,
ops::SequenceUnpadOpKernel<phi::GPUContext, double>,
ops::SequenceUnpadOpKernel<phi::GPUContext, int>,
ops::SequenceUnpadOpKernel<phi::GPUContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
sequence_unpad_grad,
ops::SequenceUnpadGradOpKernel<phi::GPUContext, float>,
ops::SequenceUnpadGradOpKernel<phi::GPUContext, double>,
ops::SequenceUnpadGradOpKernel<phi::GPUContext, int>,
ops::SequenceUnpadGradOpKernel<phi::GPUContext, int64_t>);
PD_REGISTER_STRUCT_KERNEL(sequence_unpad,
GPU,
ALL_LAYOUT,
ops::SequenceUnpadOpKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad,
GPU,
ALL_LAYOUT,
ops::SequenceUnpadGradOpKernel,
float,
double,
int,
int64_t) {}
......@@ -27,7 +27,7 @@ namespace operators {
using LoDTensor = phi::DenseTensor;
using LoD = framework::LoD;
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceUnpadOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -81,7 +81,7 @@ class SequenceUnpadOpKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SequenceUnpadGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -18,6 +18,6 @@ limitations under the License. */
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(sequence_unpad,
ops::SequenceUnpadOpKernel<phi::XPUContext, float>);
ops::SequenceUnpadOpKernel<float, phi::XPUContext>);
#endif
......@@ -62,18 +62,22 @@ Return a tensor $Out$ that shares data with the input tensor $X$ and without ten
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OPERATOR(
share_data,
ops::ShareDataOp,
ops::ShareDataOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(share_data,
ops::ShareDataKernel<bool>,
ops::ShareDataKernel<int>,
ops::ShareDataKernel<int8_t>,
ops::ShareDataKernel<uint8_t>,
ops::ShareDataKernel<paddle::platform::float16>,
ops::ShareDataKernel<int64_t>,
ops::ShareDataKernel<float>,
ops::ShareDataKernel<double>)
PD_REGISTER_STRUCT_KERNEL(share_data,
CPU,
ALL_LAYOUT,
ops::ShareDataKernel,
bool,
int,
int8_t,
uint8_t,
int64_t,
float,
double,
plat::float16) {}
......@@ -14,13 +14,17 @@ limitations under the License. */
#include "paddle/fluid/operators/share_data_op.h"
REGISTER_OP_CUDA_KERNEL(
share_data,
paddle::operators::ShareDataKernel<bool>,
paddle::operators::ShareDataKernel<int>,
paddle::operators::ShareDataKernel<int8_t>,
paddle::operators::ShareDataKernel<uint8_t>,
paddle::operators::ShareDataKernel<paddle::platform::float16>,
paddle::operators::ShareDataKernel<int64_t>,
paddle::operators::ShareDataKernel<float>,
paddle::operators::ShareDataKernel<double>);
namespace ops = paddle::operators;
namespace plat = paddle::platform;
PD_REGISTER_STRUCT_KERNEL(share_data,
GPU,
ALL_LAYOUT,
ops::ShareDataKernel,
bool,
int,
int8_t,
uint8_t,
int64_t,
float,
double,
plat::float16) {}
......@@ -18,7 +18,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class ShareDataKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -159,14 +159,19 @@ REGISTER_OPERATOR(shuffle_batch,
ops::ShuffleBatchGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(shuffle_batch_grad, ops::ShuffleBatchOpGrad);
REGISTER_OP_CPU_KERNEL(shuffle_batch,
ops::ShuffleBatchKernel<float>,
ops::ShuffleBatchKernel<double>,
ops::ShuffleBatchKernel<int32_t>,
ops::ShuffleBatchKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(shuffle_batch_grad,
ops::ShuffleBatchGradKernel<float>,
ops::ShuffleBatchGradKernel<double>,
ops::ShuffleBatchGradKernel<int32_t>,
ops::ShuffleBatchGradKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(shuffle_batch,
CPU,
ALL_LAYOUT,
ops::ShuffleBatchKernel,
float,
double,
int32_t,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(shuffle_batch_grad,
CPU,
ALL_LAYOUT,
ops::ShuffleBatchGradKernel,
float,
double,
int32_t,
int64_t) {}
......@@ -79,7 +79,7 @@ struct ReorderFunctor {
int64_t stride_;
};
template <typename T>
template <typename T, typename DeviceContext>
class ShuffleBatchCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -149,7 +149,7 @@ class ShuffleBatchCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class ShuffleBatchGradCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -180,15 +180,21 @@ class ShuffleBatchGradCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(shuffle_batch,
ops::ShuffleBatchCUDAKernel<float>,
ops::ShuffleBatchCUDAKernel<double>,
ops::ShuffleBatchCUDAKernel<int32_t>,
ops::ShuffleBatchCUDAKernel<int64_t>);
REGISTER_OP_CUDA_KERNEL(shuffle_batch_grad,
ops::ShuffleBatchGradCUDAKernel<float>,
ops::ShuffleBatchGradCUDAKernel<double>,
ops::ShuffleBatchGradCUDAKernel<int32_t>,
ops::ShuffleBatchGradCUDAKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(shuffle_batch,
GPU,
ALL_LAYOUT,
ops::ShuffleBatchCUDAKernel,
float,
double,
int32_t,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(shuffle_batch_grad,
GPU,
ALL_LAYOUT,
ops::ShuffleBatchGradCUDAKernel,
float,
double,
int32_t,
int64_t) {}
#endif
......@@ -36,7 +36,7 @@ namespace operators {
template <typename T>
using Vector = phi::Vector<T>;
template <typename T>
template <typename T, typename DeviceContext>
class ShuffleBatchKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
......@@ -122,7 +122,7 @@ class ShuffleBatchKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class ShuffleBatchGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
......
......@@ -123,11 +123,15 @@ REGISTER_OPERATOR(shuffle_channel,
REGISTER_OPERATOR(shuffle_channel_grad, ops::ShuffleChannelGradOp);
REGISTER_OP_CPU_KERNEL(shuffle_channel,
ops::ShuffleChannelOpKernel<phi::CPUContext, float>,
ops::ShuffleChannelOpKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(
shuffle_channel_grad,
ops::ShuffleChannelGradOpKernel<phi::CPUContext, float>,
ops::ShuffleChannelGradOpKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(shuffle_channel,
CPU,
ALL_LAYOUT,
ops::ShuffleChannelOpKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(shuffle_channel_grad,
CPU,
ALL_LAYOUT,
ops::ShuffleChannelGradOpKernel,
float,
double) {}
......@@ -43,7 +43,7 @@ __global__ void ShuffleChannel(const int nthreads,
p_o[k] = input[index];
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ShuffleChannelOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -82,7 +82,7 @@ class ShuffleChannelOpCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -126,11 +126,15 @@ class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
shuffle_channel,
ops::ShuffleChannelOpCUDAKernel<phi::GPUContext, float>,
ops::ShuffleChannelOpCUDAKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(
shuffle_channel_grad,
ops::ShuffleChannelGradOpCUDAKernel<phi::GPUContext, float>,
ops::ShuffleChannelGradOpCUDAKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(shuffle_channel,
GPU,
ALL_LAYOUT,
ops::ShuffleChannelOpCUDAKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(shuffle_channel_grad,
GPU,
ALL_LAYOUT,
ops::ShuffleChannelGradOpCUDAKernel,
float,
double) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ShuffleChannelOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -54,7 +54,7 @@ class ShuffleChannelOpKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ShuffleChannelGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册