diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index a709616314b174bf204d17becdb60b99d2eab895..c9cf2572d1d5c4129f51c5262181626fb2415c00 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -4,7 +4,7 @@ endif() INCLUDE(ExternalProject) SET(XPU_PROJECT "extern_xpu") -SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_04.tar.gz" CACHE STRING "" FORCE) +SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_07_cdfbf0c.tar.gz" CACHE STRING "" FORCE) SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 49b7a08a7b52b0a1dd110215aad301f1b484317e..48e55e8f61222d3802843ca13983df6c6c95022c 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -61,13 +61,38 @@ void xpu_activation_forward(const framework::ExecutionContext &ctx, const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); int r = 0; - if (xpu::Activation_t::ACT_POW == type.type) { - type.pow_factor = ctx.Attr("factor"); - } auto xpu_context = ctx.device_context().x_context(); - r = xpu::activation_forward(xpu_context, type, x->numel(), - reinterpret_cast(x_data), - reinterpret_cast(y_data)); + + switch (type.type) { + case xpu::Activation_t::HARD_SWISH: { + float threshold = ctx.Attr("threshold"); + float scale = ctx.Attr("scale"); + float offset = ctx.Attr("offset"); + PADDLE_ENFORCE_EQ(threshold, 6.0f, + platform::errors::External( + "Not support threshold [%f] in XPU", threshold)); + PADDLE_ENFORCE_EQ( + scale, 6.0f, + platform::errors::External("Not support scale [%f] in XPU", scale)); + PADDLE_ENFORCE_EQ( + offset, 3.0f, + platform::errors::External("Not support offset [%f] in XPU", offset)); + + r = xpu::hard_swish(xpu_context, reinterpret_cast(x_data), + reinterpret_cast(y_data), x->numel()); + break; + } + case xpu::Activation_t::ACT_POW: { + type.pow_factor = ctx.Attr("factor"); + } + default: { + r = xpu::activation_forward(xpu_context, type, x->numel(), + reinterpret_cast(x_data), + reinterpret_cast(y_data)); + break; + } + } + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( "XPU API return wrong value[%d], please check whether " @@ -90,12 +115,40 @@ void xpu_activation_backward(const framework::ExecutionContext &ctx, if (y != nullptr) y_data = y->data(); if (dOut != nullptr) y_grad = dOut->data(); T *x_grad = dX->mutable_data(ctx.GetPlace()); + int r = 0; auto xpu_context = ctx.device_context().x_context(); - int r = xpu::activation_backward(xpu_context, type, dX->numel(), + + switch (type.type) { + case xpu::Activation_t::HARD_SWISH: { + float threshold = ctx.Attr("threshold"); + float scale = ctx.Attr("scale"); + float offset = ctx.Attr("offset"); + PADDLE_ENFORCE_EQ(threshold, 6.0f, + platform::errors::External( + "Not support threshold [%f] in XPU", threshold)); + PADDLE_ENFORCE_EQ( + scale, 6.0f, + platform::errors::External("Not support scale [%f] in XPU", scale)); + PADDLE_ENFORCE_EQ( + offset, 3.0f, + platform::errors::External("Not support offset [%f] in XPU", offset)); + r = xpu::hard_swish_grad(xpu_context, + reinterpret_cast(x_data), + reinterpret_cast(y_data), + reinterpret_cast(y_grad), + reinterpret_cast(x_grad), dX->numel()); + break; + } + default: { + r = xpu::activation_backward(xpu_context, type, dX->numel(), reinterpret_cast(x_data), reinterpret_cast(y_data), reinterpret_cast(y_grad), reinterpret_cast(x_grad)); + break; + } + } + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( "XPU API return wrong value[%d], please check whether " @@ -132,6 +185,8 @@ using XPULogFunctor = XPUActivationFunc; template using XPUSquareFunctor = XPUActivationFunc; template +using XPUHardSwishFunctor = XPUActivationFunc; +template using XPUSuareGradFunctor = XPUActivationGradFunc; template using XPUReluGradFunctor = XPUActivationGradFunc; @@ -147,6 +202,9 @@ using XPUSqrtFunctor = XPUActivationFunc; template using XPUSqrtGradFunctor = XPUActivationGradFunc; template +using XPUHardSwishGradFunctor = + XPUActivationGradFunc; +template using XPUACTPowFunctor = XPUActivationFunc; template using XPUABSFunctor = XPUActivationFunc; @@ -169,6 +227,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor, REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor, + XPUHardSwishGradFunctor) REGISTER_OP_XPU_KERNEL(log, ops::XPUActivationKernel>); REGISTER_OP_XPU_KERNEL(pow, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py old mode 100755 new mode 100644 index 788c110a592c0e18734e2b361a7edcdbc691230a..8635a7db361c1aecc1d2ba8880023c704203fac6 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py @@ -20,6 +20,7 @@ import unittest import numpy as np import paddle.fluid.core as core from op_test import OpTest +from op_test_xpu import XPUOpTest from scipy.special import expit, erf import paddle import paddle.fluid as fluid @@ -30,7 +31,7 @@ from paddle.fluid import compiler, Program, program_guard @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUActivation(OpTest): +class TestXPUActivation(XPUOpTest): def setUp(self): self.op_type = "exp" self.init_dtype() @@ -166,6 +167,33 @@ def gelu(x, approximate): return y_ref.astype(x.dtype) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUHardSwish(TestXPUActivation): + def setUp(self): + self.op_type = "hard_swish" + self.init_dtype() + x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + offset = 3.0 + threshold = 6.0 + scale = 6.0 + out = hard_swish(x, offset, threshold, scale) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {'use_xpu': True} + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + +def hard_swish(x, offset, threshold, scale): + y_ref = np.minimum(threshold, np.maximum(0, x + offset)) * x / scale + return y_ref.astype(x.dtype) + + @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULog(TestXPUActivation):