未验证 提交 9f99b591 编写于 作者: H houj04 提交者: GitHub

add softplus op for kunlun2. test=kunlun (#39555)

* add softplus op for kunlun2. test=kunlun

* add softplus op for kunlun2. test=kunlun

* fix code style. test=kunlun

* fix code style. test=kunlun

* add more test cases. test=kunlun
上级 c1c5c1fc
......@@ -36,7 +36,7 @@ ENDIF()
if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220119")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220215")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
......
......@@ -14,8 +14,10 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace paddle {
......@@ -364,6 +366,50 @@ struct XPUPowFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct XPUSoftPlusFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
float beta = ctx.Attr<float>("beta");
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r =
xpu::softplus(xpu_context, x_data, y_data, x->numel(), beta, threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus");
}
};
template <typename T>
struct XPUSoftPlusGradFunctor : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
const auto *x = ctx.Input<Tensor>("X");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = x->data<T>();
const T *y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
float beta = ctx.Attr<float>("beta");
float threshold = ctx.Attr<float>("threshold");
auto xpu_context =
ctx.device_context<paddle::platform::XPUDeviceContext>().x_context();
int r = xpu::softplus_grad(
xpu_context, reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(
x_data), // softplus_grad do not need y_data
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad), dX->numel(), beta, threshold);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softplus_grad");
}
};
} // namespace operators
} // namespace paddle
......@@ -388,6 +434,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
XPUSigmoidGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSquareGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(softplus, XPUSoftPlusFunctor,
XPUSoftPlusGradFunctor)
REGISTER_OP_XPU_KERNEL(
tanh, ops::XPUActivationKernel<ops::XPUTanhFunctor<float>>,
......
......@@ -317,6 +317,9 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::FP16, XPUPlace())})},
{"softmax_with_cross_entropy",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"softplus", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"softplus_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"split", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace())})},
{"squeeze2_grad",
......
......@@ -358,6 +358,59 @@ class TestXPUReciprocal(TestXPUActivation):
self.check_grad_with_place(place, ['X'], 'Out')
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSoftPlus(TestXPUActivation):
def setUp(self):
self.op_type = "softplus"
self.init_dtype()
self.init_config()
beta = np.random.uniform(0, 1)
threshold = np.random.uniform(0, 1)
out = ref_softplus(self.x, beta, threshold)
self.inputs = {'X': self.x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True, 'beta': beta, 'threshold': threshold}
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSoftPlus2(TestXPUSoftPlus):
def init_config(self):
self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSoftPlus3(TestXPUSoftPlus):
def init_config(self):
self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSoftPlus4(TestXPUSoftPlus):
def init_config(self):
self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype)
def ref_softplus(x, beta=1, threshold=20):
x_beta = beta * x
out = np.select([x_beta <= threshold, x_beta > threshold],
[np.log(1 + np.exp(x_beta)) / beta, x])
return out
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册