未验证 提交 3a055833 编写于 作者: P procr 提交者: GitHub

support mobilenet for kunlun (#29458)

上级 ec26a26a
...@@ -4,7 +4,7 @@ endif() ...@@ -4,7 +4,7 @@ endif()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(XPU_PROJECT "extern_xpu") SET(XPU_PROJECT "extern_xpu")
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_04.tar.gz" CACHE STRING "" FORCE) SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2020_12_07_cdfbf0c.tar.gz" CACHE STRING "" FORCE)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
......
...@@ -61,13 +61,38 @@ void xpu_activation_forward(const framework::ExecutionContext &ctx, ...@@ -61,13 +61,38 @@ void xpu_activation_forward(const framework::ExecutionContext &ctx,
const T *x_data = x->data<T>(); const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace()); T *y_data = y->mutable_data<T>(ctx.GetPlace());
int r = 0; int r = 0;
if (xpu::Activation_t::ACT_POW == type.type) { auto xpu_context = ctx.device_context<DeviceContext>().x_context();
switch (type.type) {
case xpu::Activation_t::HARD_SWISH: {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold, 6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale, 6.0f,
platform::errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset, 3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
r = xpu::hard_swish(xpu_context, reinterpret_cast<const float *>(x_data),
reinterpret_cast<float *>(y_data), x->numel());
break;
}
case xpu::Activation_t::ACT_POW: {
type.pow_factor = ctx.Attr<float>("factor"); type.pow_factor = ctx.Attr<float>("factor");
} }
auto xpu_context = ctx.device_context<DeviceContext>().x_context(); default: {
r = xpu::activation_forward(xpu_context, type, x->numel(), r = xpu::activation_forward(xpu_context, type, x->numel(),
reinterpret_cast<const float *>(x_data), reinterpret_cast<const float *>(x_data),
reinterpret_cast<float *>(y_data)); reinterpret_cast<float *>(y_data));
break;
}
}
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External( platform::errors::External(
"XPU API return wrong value[%d], please check whether " "XPU API return wrong value[%d], please check whether "
...@@ -90,12 +115,40 @@ void xpu_activation_backward(const framework::ExecutionContext &ctx, ...@@ -90,12 +115,40 @@ void xpu_activation_backward(const framework::ExecutionContext &ctx,
if (y != nullptr) y_data = y->data<T>(); if (y != nullptr) y_data = y->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>(); if (dOut != nullptr) y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace()); T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
int r = 0;
auto xpu_context = ctx.device_context<DeviceContext>().x_context(); auto xpu_context = ctx.device_context<DeviceContext>().x_context();
int r = xpu::activation_backward(xpu_context, type, dX->numel(),
switch (type.type) {
case xpu::Activation_t::HARD_SWISH: {
float threshold = ctx.Attr<float>("threshold");
float scale = ctx.Attr<float>("scale");
float offset = ctx.Attr<float>("offset");
PADDLE_ENFORCE_EQ(threshold, 6.0f,
platform::errors::External(
"Not support threshold [%f] in XPU", threshold));
PADDLE_ENFORCE_EQ(
scale, 6.0f,
platform::errors::External("Not support scale [%f] in XPU", scale));
PADDLE_ENFORCE_EQ(
offset, 3.0f,
platform::errors::External("Not support offset [%f] in XPU", offset));
r = xpu::hard_swish_grad(xpu_context,
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(y_data),
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad), dX->numel());
break;
}
default: {
r = xpu::activation_backward(xpu_context, type, dX->numel(),
reinterpret_cast<const float *>(x_data), reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(y_data), reinterpret_cast<const float *>(y_data),
reinterpret_cast<const float *>(y_grad), reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad)); reinterpret_cast<float *>(x_grad));
break;
}
}
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External( platform::errors::External(
"XPU API return wrong value[%d], please check whether " "XPU API return wrong value[%d], please check whether "
...@@ -132,6 +185,8 @@ using XPULogFunctor = XPUActivationFunc<T, xpu::Activation_t::LOG>; ...@@ -132,6 +185,8 @@ using XPULogFunctor = XPUActivationFunc<T, xpu::Activation_t::LOG>;
template <typename T> template <typename T>
using XPUSquareFunctor = XPUActivationFunc<T, xpu::Activation_t::SQUARE>; using XPUSquareFunctor = XPUActivationFunc<T, xpu::Activation_t::SQUARE>;
template <typename T> template <typename T>
using XPUHardSwishFunctor = XPUActivationFunc<T, xpu::Activation_t::HARD_SWISH>;
template <typename T>
using XPUSuareGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQUARE>; using XPUSuareGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQUARE>;
template <typename T> template <typename T>
using XPUReluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::RELU>; using XPUReluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::RELU>;
...@@ -147,6 +202,9 @@ using XPUSqrtFunctor = XPUActivationFunc<T, xpu::Activation_t::SQRT>; ...@@ -147,6 +202,9 @@ using XPUSqrtFunctor = XPUActivationFunc<T, xpu::Activation_t::SQRT>;
template <typename T> template <typename T>
using XPUSqrtGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQRT>; using XPUSqrtGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQRT>;
template <typename T> template <typename T>
using XPUHardSwishGradFunctor =
XPUActivationGradFunc<T, xpu::Activation_t::HARD_SWISH>;
template <typename T>
using XPUACTPowFunctor = XPUActivationFunc<T, xpu::Activation_t::ACT_POW>; using XPUACTPowFunctor = XPUActivationFunc<T, xpu::Activation_t::ACT_POW>;
template <typename T> template <typename T>
using XPUABSFunctor = XPUActivationFunc<T, xpu::Activation_t::ABS>; using XPUABSFunctor = XPUActivationFunc<T, xpu::Activation_t::ABS>;
...@@ -169,6 +227,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor, ...@@ -169,6 +227,8 @@ REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor) REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(hard_swish, XPUHardSwishFunctor,
XPUHardSwishGradFunctor)
REGISTER_OP_XPU_KERNEL(log, REGISTER_OP_XPU_KERNEL(log,
ops::XPUActivationKernel<ops::XPULogFunctor<float>>); ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
REGISTER_OP_XPU_KERNEL(pow, REGISTER_OP_XPU_KERNEL(pow,
......
...@@ -20,6 +20,7 @@ import unittest ...@@ -20,6 +20,7 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
from op_test_xpu import XPUOpTest
from scipy.special import expit, erf from scipy.special import expit, erf
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -30,7 +31,7 @@ from paddle.fluid import compiler, Program, program_guard ...@@ -30,7 +31,7 @@ from paddle.fluid import compiler, Program, program_guard
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUActivation(OpTest): class TestXPUActivation(XPUOpTest):
def setUp(self): def setUp(self):
self.op_type = "exp" self.op_type = "exp"
self.init_dtype() self.init_dtype()
...@@ -166,6 +167,33 @@ def gelu(x, approximate): ...@@ -166,6 +167,33 @@ def gelu(x, approximate):
return y_ref.astype(x.dtype) return y_ref.astype(x.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUHardSwish(TestXPUActivation):
def setUp(self):
self.op_type = "hard_swish"
self.init_dtype()
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
offset = 3.0
threshold = 6.0
scale = 6.0
out = hard_swish(x, offset, threshold, scale)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
def hard_swish(x, offset, threshold, scale):
y_ref = np.minimum(threshold, np.maximum(0, x + offset)) * x / scale
return y_ref.astype(x.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPULog(TestXPUActivation): class TestXPULog(TestXPUActivation):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册