未验证 提交 ceec1e21 编写于 作者: Z zhangyk0314 提交者: GitHub

Add exp, abs_grad, reciprocal, reciprocal_grad operator for XPU and update...

Add exp, abs_grad, reciprocal, reciprocal_grad operator for XPU and update xpu2_op_list.h,test=kunlun (#38570)
上级 1fa6900e
......@@ -36,7 +36,7 @@ ENDIF()
if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211226")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20211228")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
......
......@@ -98,29 +98,29 @@ void xpu_activation_backward(
}
template <typename T>
struct XPUReluFunctor : public BaseActivationFunctor<T> {
struct XPUAbsFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu<XPUType>);
ctx, xpu::abs<XPUType>);
}
};
template <typename T>
struct XPUSigmoidFunctor : public BaseActivationFunctor<T> {
struct XPUAbsGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sigmoid<XPUType>);
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::abs_grad<XPUType>);
}
};
template <typename T>
struct XPUTanhFunctor : public BaseActivationFunctor<T> {
struct XPUExpFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh<XPUType>);
ctx, xpu::exp<XPUType>);
}
};
......@@ -134,119 +134,83 @@ struct XPULogFunctor : public BaseActivationFunctor<T> {
};
template <typename T>
struct XPUSquareFunctor : public BaseActivationFunctor<T> {
struct XPUReciprocalFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::square<XPUType>);
ctx, xpu::reciprocal<XPUType>);
}
};
template <typename T>
struct XPUSqrtFunctor : public BaseActivationFunctor<T> {
struct XPUReciprocalGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sqrt<XPUType>);
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::reciprocal_grad<XPUType>);
}
};
template <typename T>
struct XPUAbsFunctor : public BaseActivationFunctor<T> {
struct XPUReluFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::abs<XPUType>);
ctx, xpu::relu<XPUType>);
}
};
template <typename T>
struct XPUPowFunctor : public BaseActivationFunctor<T> {
struct XPUReluGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
auto pow_factor = ctx.Attr<float>("factor");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
T *factor_data = nullptr;
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&factor_data),
x->numel() * sizeof(T)),
XPU_SUCCESS, platform::errors::ResourceExhausted(
"XPU has no enough memory"));
int r = xpu::constant<T>(xpu_context, factor_data, x->numel(), pow_factor);
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU constant op return"
" wrong value[%d %s] in pow op.",
r, XPUAPIErrorMsg[r]));
r = xpu::pow(xpu_context, x_data, factor_data, y_data, x->numel());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU pow op return"
" wrong value[%d %s].",
r, XPUAPIErrorMsg[r]));
if (xpu_context->xpu_stream != nullptr) {
xpu_wait(xpu_context->xpu_stream);
}
xpu_free(factor_data);
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu_grad<XPUType>);
}
};
template <typename T>
struct XPUHardSwishFunctor : public BaseActivationFunctor<T> {
struct XPUSigmoidFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold, 6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External(
"Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset, 3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::hard_swish<XPUType>);
ctx, xpu::sigmoid<XPUType>);
}
};
template <typename T>
struct XPUReluGradFunctor : public BaseActivationFunctor<T> {
struct XPUSigmoidGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::relu_grad<XPUType>);
ctx, xpu::sigmoid_grad<XPUType>);
}
};
template <typename T>
struct XPUTanhGradFunctor : public BaseActivationFunctor<T> {
struct XPUSqrtFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh_grad<XPUType>);
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sqrt<XPUType>);
}
};
template <typename T>
struct XPUSigmoidGradFunctor : public BaseActivationFunctor<T> {
struct XPUSqrtGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sigmoid_grad<XPUType>);
ctx, xpu::sqrt_grad<XPUType>);
}
};
template <typename T>
struct XPUSqrtGradFunctor : public BaseActivationFunctor<T> {
struct XPUSquareFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::sqrt_grad<XPUType>);
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::square<XPUType>);
}
};
......@@ -259,6 +223,44 @@ struct XPUSquareGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct XPUTanhFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh<XPUType>);
}
};
template <typename T>
struct XPUTanhGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::tanh_grad<XPUType>);
}
};
template <typename T>
struct XPUHardSwishFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
void operator()(const framework::ExecutionContext &ctx) const {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold, 6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External(
"Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset, 3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
xpu_activation_forward<paddle::platform::XPUDeviceContext, T, XPUType>(
ctx, xpu::hard_swish<XPUType>);
}
};
template <typename T>
struct XPUHardSwishGradFunctor : public BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
......@@ -328,6 +330,40 @@ struct XPULeakyReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct XPUPowFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
auto pow_factor = ctx.Attr<float>("factor");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
T *factor_data = nullptr;
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&factor_data),
x->numel() * sizeof(T)),
XPU_SUCCESS, platform::errors::ResourceExhausted(
"XPU has no enough memory"));
int r = xpu::constant<T>(xpu_context, factor_data, x->numel(), pow_factor);
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU constant op return"
" wrong value[%d %s] in pow op.",
r, XPUAPIErrorMsg[r]));
r = xpu::pow(xpu_context, x_data, factor_data, y_data, x->numel());
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU pow op return wrong value[%d %s].", r,
XPUAPIErrorMsg[r]));
if (xpu_context->xpu_stream != nullptr) {
xpu_wait(xpu_context->xpu_stream);
}
xpu_free(factor_data);
}
};
} // namespace operators
} // namespace paddle
......@@ -340,15 +376,18 @@ namespace ops = paddle::operators;
act_type##_grad, \
ops::XPUActivationGradKernel<ops::grad_functor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(abs, XPUAbsFunctor, XPUAbsGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor,
XPUHardSwishGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, XPULeakyReluFunctor,
XPULeakyReluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(reciprocal, XPUReciprocalFunctor,
XPUReciprocalGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(relu, XPUReluFunctor, XPUReluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
XPUSigmoidGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor,
XPUHardSwishGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(leaky_relu, XPULeakyReluFunctor,
XPULeakyReluGradFunctor)
REGISTER_OP_XPU_KERNEL(
tanh, ops::XPUActivationKernel<ops::XPUTanhFunctor<float>>,
......@@ -358,11 +397,11 @@ REGISTER_OP_XPU_KERNEL(
ops::XPUActivationGradKernel<
ops::XPUTanhGradFunctor<paddle::platform::float16>>);
REGISTER_OP_XPU_KERNEL(exp,
ops::XPUActivationKernel<ops::XPUExpFunctor<float>>);
REGISTER_OP_XPU_KERNEL(log,
ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
REGISTER_OP_XPU_KERNEL(pow,
ops::XPUActivationKernel<ops::XPUPowFunctor<float>>);
REGISTER_OP_XPU_KERNEL(abs,
ops::XPUActivationKernel<ops::XPUAbsFunctor<float>>);
#endif // PADDLE_WITH_XPU
......@@ -29,6 +29,9 @@ using XPUOpMap = std::unordered_map<std::string, XPUKernelSet>;
XPUOpMap& get_kl2_ops() {
// KL1支持的op,通过op_name, data_type, place来索引
static XPUOpMap s_xpu2_kernels{
{"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"abs_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"adamw", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"arg_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
......@@ -106,6 +109,7 @@ XPUOpMap& get_kl2_ops() {
{"equal", XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})},
{"exp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"expand_as_v2",
XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace()),
......@@ -185,6 +189,9 @@ XPUOpMap& get_kl2_ops() {
XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_swish_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"iou_similarity",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"label_smooth",
......@@ -227,6 +234,10 @@ XPUOpMap& get_kl2_ops() {
{"momentum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"nearest_interp_v2",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"nearest_interp_v2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"not_equal", XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})},
......@@ -239,6 +250,10 @@ XPUOpMap& get_kl2_ops() {
{"prior_box", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace())})},
{"reciprocal", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reciprocal_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"reduce_max_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
......@@ -273,6 +288,9 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::FP32, XPUPlace())})},
{"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace())})},
{"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace())})},
......
......@@ -154,6 +154,11 @@ class TestXPUAbs(TestXPUActivation):
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
......@@ -334,6 +339,25 @@ def leaky_relu(x, alpha):
return y_ref.astype(x.dtype)
class TestXPUReciprocal(TestXPUActivation):
def setUp(self):
self.op_type = "reciprocal"
self.init_dtype()
np.random.seed(1024)
x = np.random.uniform(1, 2, [1111, 1117]).astype(self.dtype)
out = np.reciprocal(x)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册