未验证 提交 8b24c795 编写于 作者: L Leo Chen 提交者: GitHub

move gelu/gelu_grad/generate_proposals_v2 kernel to phi (#45471)

* move xpu kernel to phi

* delete fluid file

* fix compile

* add guard, test=kunlun

* xpu set constant

* fix xpu error, test=kunlun
上级 c857841e
...@@ -42,8 +42,7 @@ if(WITH_XPU) ...@@ -42,8 +42,7 @@ if(WITH_XPU)
detection_library(iou_similarity_op SRCS iou_similarity_op.cc detection_library(iou_similarity_op SRCS iou_similarity_op.cc
iou_similarity_op_xpu.cc) iou_similarity_op_xpu.cc)
detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc)
detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc)
generate_proposals_v2_op_xpu.cc)
elseif(WITH_MLU) elseif(WITH_MLU)
detection_library(iou_similarity_op SRCS iou_similarity_op.cc detection_library(iou_similarity_op SRCS iou_similarity_op.cc
iou_similarity_op_mlu.cc) iou_similarity_op_mlu.cc)
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class GeluXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* out = ctx.Output<Tensor>("Out");
auto place = ctx.GetPlace();
const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
XPUType* y_data = reinterpret_cast<XPUType*>(out->mutable_data<T>(place));
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::gelu<XPUType>(dev_ctx.x_context(), x_data, y_data, x->numel());
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External(
"XPU gelu kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r]));
}
};
template <typename DeviceContext, typename T>
class GeluGradXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto place = ctx.GetPlace();
const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
const XPUType* dout_data =
reinterpret_cast<const XPUType*>(dout->data<T>());
XPUType* dx_data = reinterpret_cast<XPUType*>(dx->mutable_data<T>(place));
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::gelu_grad<XPUType>(dev_ctx.x_context(),
x_data,
nullptr,
dout_data,
dx_data,
dout->numel());
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU gelu_grad kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
gelu,
ops::GeluXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GeluXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
gelu_grad,
ops::GeluGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::GeluGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
...@@ -53,6 +53,22 @@ struct SetConstant { ...@@ -53,6 +53,22 @@ struct SetConstant {
T num); T num);
}; };
#ifdef PADDLE_WITH_XPU
template <typename T>
struct SetConstant<XPUContext, T> {
void operator()(const XPUContext& context,
paddle::framework::Tensor* tensor,
T num);
};
template <typename T>
struct SetConstant<paddle::platform::XPUDeviceContext, T> {
void operator()(const paddle::platform::XPUDeviceContext& context,
paddle::framework::Tensor* tensor,
T num);
};
#endif
template <typename Place> template <typename Place>
void set_constant_with_place(const paddle::platform::DeviceContext& context, void set_constant_with_place(const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
......
...@@ -27,21 +27,28 @@ using paddle::framework::To32BitIndex; ...@@ -27,21 +27,28 @@ using paddle::framework::To32BitIndex;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void SetConstant<DeviceContext, T>::operator()( void SetConstant<DeviceContext, T>::operator()(
const DeviceContext& context, paddle::framework::Tensor* tensor, T num) { const DeviceContext& context, paddle::framework::Tensor* tensor, T num) {
bool xpu_place = false;
#ifdef PADDLE_WITH_XPU
if (paddle::platform::is_xpu_place(context.GetPlace())) {
xpu_place = true;
phi::VisitDataType(
tensor->dtype(),
TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
}
#endif
if (!xpu_place) {
auto t = paddle::framework::EigenVector<T>::Flatten(*tensor); auto t = paddle::framework::EigenVector<T>::Flatten(*tensor);
t.device(*context.eigen_device()) = t.constant(static_cast<T>(num)); t.device(*context.eigen_device()) = t.constant(static_cast<T>(num));
}
} }
#ifdef PADDLE_WITH_XPU
template <typename T>
void SetConstant<XPUContext, T>::operator()(const XPUContext& context,
paddle::framework::Tensor* tensor,
T num) {
phi::VisitDataType(tensor->dtype(),
TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
}
template <typename T>
void SetConstant<paddle::platform::XPUDeviceContext, T>::operator()(
const paddle::platform::XPUDeviceContext& context,
paddle::framework::Tensor* tensor,
T num) {
phi::VisitDataType(tensor->dtype(),
TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
}
#endif
template <typename DeviceContext, typename T, int Rank> template <typename DeviceContext, typename T, int Rank>
void Transpose<DeviceContext, T, Rank>::operator()( void Transpose<DeviceContext, T, Rank>::operator()(
const DeviceContext& context, const DeviceContext& context,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/gelu_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void GeluGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
bool approximate,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(x_grad);
int r = xpu::gelu_grad<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
nullptr,
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
x_grad->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "gelu_grad");
}
} // namespace phi
PD_REGISTER_KERNEL(gelu_grad,
XPU,
ALL_LAYOUT,
phi::GeluGradKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/gelu_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void GeluKernel(const Context& dev_ctx,
const DenseTensor& x,
bool approximate,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(out);
int r = xpu::gelu<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
out->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "gelu");
}
} // namespace phi
PD_REGISTER_KERNEL(
gelu, XPU, ALL_LAYOUT, phi::GeluKernel, float, phi::dtype::float16) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册