未验证 提交 d9edb233 编写于 作者: H huangjiyi 提交者: GitHub

Register fluid kerenls to phi [part 13] (#53037)

* update

* fix bug

* update

* fix bug
上级 7a323f78
...@@ -63,7 +63,7 @@ __global__ void YoloBoxHeadCudaKernel(const T* input, ...@@ -63,7 +63,7 @@ __global__ void YoloBoxHeadCudaKernel(const T* input,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class YoloBoxHeadKernel : public framework::OpKernel<T> { class YoloBoxHeadKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -103,4 +103,5 @@ class YoloBoxHeadKernel : public framework::OpKernel<T> { ...@@ -103,4 +103,5 @@ class YoloBoxHeadKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(yolo_box_head, ops::YoloBoxHeadKernel<float>); PD_REGISTER_STRUCT_KERNEL(
yolo_box_head, GPU, ALL_LAYOUT, ops::YoloBoxHeadKernel, float) {}
...@@ -315,7 +315,7 @@ static void YoloTensorParseCuda( ...@@ -315,7 +315,7 @@ static void YoloTensorParseCuda(
prob_thresh); prob_thresh);
} }
template <typename T> template <typename T, typename DeviceContext>
class YoloBoxPostKernel : public framework::OpKernel<T> { class YoloBoxPostKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -555,4 +555,5 @@ class YoloBoxPostKernel : public framework::OpKernel<T> { ...@@ -555,4 +555,5 @@ class YoloBoxPostKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(yolo_box_post, ops::YoloBoxPostKernel<float>); PD_REGISTER_STRUCT_KERNEL(
yolo_box_post, GPU, ALL_LAYOUT, ops::YoloBoxPostKernel, float) {}
...@@ -56,7 +56,7 @@ inline void UniformRealDistribution(paddle::platform::bfloat16 *data, ...@@ -56,7 +56,7 @@ inline void UniformRealDistribution(paddle::platform::bfloat16 *data,
// It seems that Eigen::Tensor::random in GPU will SEGFAULT. // It seems that Eigen::Tensor::random in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to // Use std::random and thrust::random(thrust is a std library in CUDA) to
// implement uniform random. // implement uniform random.
template <typename T> template <typename T, typename DeviceContext>
class CPUUniformRandomKernel : public framework::OpKernel<T> { class CPUUniformRandomKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -178,16 +178,20 @@ with random values sampled from a uniform distribution. ...@@ -178,16 +178,20 @@ with random values sampled from a uniform distribution.
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OPERATOR( REGISTER_OPERATOR(
uniform_random_batch_size_like, uniform_random_batch_size_like,
paddle::operators::UniformRandomBatchSizeLikeOp, ops::UniformRandomBatchSizeLikeOp,
paddle::operators::UniformRandomBatchSizeLikeOpMaker, ops::UniformRandomBatchSizeLikeOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>, paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
paddle::operators::BatchSizeLikeNoNeedBufferVarsInferer); ops::BatchSizeLikeNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL( PD_REGISTER_STRUCT_KERNEL(uniform_random_batch_size_like,
uniform_random_batch_size_like, CPU,
paddle::operators::CPUUniformRandomKernel<float>, ALL_LAYOUT,
paddle::operators::CPUUniformRandomKernel<double>, ops::CPUUniformRandomKernel,
paddle::operators::CPUUniformRandomKernel<paddle::platform::bfloat16>); float,
double,
plat::bfloat16) {}
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class GPUUniformRandomKernel : public framework::OpKernel<T> { class GPUUniformRandomKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -59,6 +59,10 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -59,6 +59,10 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_CUDA_KERNEL(uniform_random_batch_size_like, namespace ops = paddle::operators;
paddle::operators::GPUUniformRandomKernel<float>, PD_REGISTER_STRUCT_KERNEL(uniform_random_batch_size_like,
paddle::operators::GPUUniformRandomKernel<double>); GPU,
ALL_LAYOUT,
ops::GPUUniformRandomKernel,
float,
double) {}
...@@ -162,10 +162,7 @@ REGISTER_OPERATOR(unzip, ...@@ -162,10 +162,7 @@ REGISTER_OPERATOR(unzip,
REGISTER_OPERATOR(unzip_grad, ops::unzipGradientOp); REGISTER_OPERATOR(unzip_grad, ops::unzipGradientOp);
REGISTER_OP_CPU_KERNEL(unzip, PD_REGISTER_STRUCT_KERNEL(unzip, CPU, ALL_LAYOUT, ops::unzipOpKernel, int64_t) {
ops::unzipOpKernel<int64_t>, }
ops::unzipOpKernel<int64_t>); PD_REGISTER_STRUCT_KERNEL(
unzip_grad, CPU, ALL_LAYOUT, ops::unzipGradOpKernel, int64_t) {}
REGISTER_OP_CPU_KERNEL(unzip_grad,
ops::unzipGradOpKernel<int64_t>,
ops::unzipGradOpKernel<int64_t>);
...@@ -42,7 +42,7 @@ __global__ void unzipKernel( ...@@ -42,7 +42,7 @@ __global__ void unzipKernel(
} }
} }
template <typename T, typename LodType> template <typename T, typename DeviceContext, typename LodType = int64_t>
class unzipCUDAKernel : public framework::OpKernel<T> { class unzipCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -69,7 +69,7 @@ class unzipCUDAKernel : public framework::OpKernel<T> { ...@@ -69,7 +69,7 @@ class unzipCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class unzipGradCUDAKernel : public framework::OpKernel<T> { class unzipGradCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -81,25 +81,24 @@ class unzipGradCUDAKernel : public framework::OpKernel<T> { ...@@ -81,25 +81,24 @@ class unzipGradCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( namespace plat = paddle::platform;
unzip, PD_REGISTER_STRUCT_KERNEL(unzip,
ops::unzipCUDAKernel<float, int>, GPU,
ops::unzipCUDAKernel<double, int>, ALL_LAYOUT,
ops::unzipCUDAKernel<paddle::platform::float16, int>, ops::unzipCUDAKernel,
ops::unzipCUDAKernel<int, int>, float,
ops::unzipCUDAKernel<bool, int>, double,
ops::unzipCUDAKernel<int64_t, int>, plat::float16,
ops::unzipCUDAKernel<float, int64_t>, bool,
ops::unzipCUDAKernel<double, int64_t>, int,
ops::unzipCUDAKernel<paddle::platform::float16, int64_t>, int64_t) {}
ops::unzipCUDAKernel<int, int64_t>, PD_REGISTER_STRUCT_KERNEL(unzip_grad,
ops::unzipCUDAKernel<bool, int64_t>, GPU,
ops::unzipCUDAKernel<int64_t, int64_t>); ALL_LAYOUT,
ops::unzipGradCUDAKernel,
REGISTER_OP_CUDA_KERNEL(unzip_grad, float,
ops::unzipGradCUDAKernel<float>, double,
ops::unzipGradCUDAKernel<double>, plat::float16,
ops::unzipGradCUDAKernel<paddle::platform::float16>, bool,
ops::unzipGradCUDAKernel<int>, int,
ops::unzipGradCUDAKernel<bool>, int64_t) {}
ops::unzipGradCUDAKernel<int64_t>);
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class unzipOpKernel : public framework::OpKernel<T> { class unzipOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -27,7 +27,7 @@ class unzipOpKernel : public framework::OpKernel<T> { ...@@ -27,7 +27,7 @@ class unzipOpKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class unzipGradOpKernel : public framework::OpKernel<T> { class unzipGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册