未验证 提交 123538d7 编写于 作者: Y ysh329 提交者: GitHub

[KERNEL][OPENCL] Add hard sigmoid opencl kernel. test=develop (#4177)

* [OPENCL] Add hard sigmoid opencl kernel. test=develop

* optimize hard sigmoid. test=develop
上级 9bf15fef
...@@ -55,8 +55,12 @@ const std::string& ActivationTypeToStr(ActivationType act) { ...@@ -55,8 +55,12 @@ const std::string& ActivationTypeToStr(ActivationType act) {
"Tanh", "Tanh",
"Swish", "Swish",
"Exp", "Exp",
"Abs",
"HardSwish",
"Reciprocal",
"ThresholdedRelu", "ThresholdedRelu",
"Elu"}; "Elu",
"HardSigmoid"};
auto x = static_cast<int>(act); auto x = static_cast<int>(act);
CHECK_LT(x, static_cast<int>(ActivationType::NUM)); CHECK_LT(x, static_cast<int>(ActivationType::NUM));
return act2string[x]; return act2string[x];
......
...@@ -109,7 +109,8 @@ enum class ActivationType : int { ...@@ -109,7 +109,8 @@ enum class ActivationType : int {
kReciprocal = 11, kReciprocal = 11,
kThresholdedRelu = 12, kThresholdedRelu = 12,
kElu = 13, kElu = 13,
NUM = 14, kHardSigmoid = 14,
NUM = 15,
}; };
static size_t PrecisionTypeLength(PrecisionType type) { static size_t PrecisionTypeLength(PrecisionType type) {
......
...@@ -66,6 +66,22 @@ __kernel void sigmoid(__read_only image2d_t input, ...@@ -66,6 +66,22 @@ __kernel void sigmoid(__read_only image2d_t input,
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out); WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
} }
__kernel void hard_sigmoid(__read_only image2d_t input,
__write_only image2d_t output,
__private const float value_offset,
__private const float scale) {
const int x = get_global_id(0); // image_width
const int y = get_global_id(1); // image_height
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, sampler, (int2)(x, y));
CL_DTYPE4 out = clamp(in * scale + value_offset, 0.0, 1.0);
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
}
__kernel void leaky_relu(__read_only image2d_t input, __kernel void leaky_relu(__read_only image2d_t input,
__write_only image2d_t output, __write_only image2d_t output,
__private const float threshold, __private const float threshold,
......
...@@ -72,7 +72,12 @@ class ActivationComputeImageDefault ...@@ -72,7 +72,12 @@ class ActivationComputeImageDefault
case 8: case 8:
kernel_func_name_ = "exp_act"; kernel_func_name_ = "exp_act";
break; break;
default: case 14:
kernel_func_name_ = "hard_sigmoid";
scale_ = act_param_->hard_sigmoid_slope;
threshold_ = act_param_->hard_sigmoid_offset;
break;
defauln:
LOG(FATAL) << "This act type:" << act_type << " doesn't support."; LOG(FATAL) << "This act type:" << act_type << " doesn't support.";
return; return;
} }
...@@ -316,3 +321,21 @@ REGISTER_LITE_KERNEL( ...@@ -316,3 +321,21 @@ REGISTER_LITE_KERNEL(
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault))}) DATALAYOUT(kImageDefault))})
.Finalize(); .Finalize();
// Hard Sigmoid
REGISTER_LITE_KERNEL(
hard_sigmoid,
kOpenCL,
kFP16,
kImageDefault,
paddle::lite::kernels::opencl::ActivationComputeImageDefault,
ImageDefault)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.Finalize();
...@@ -57,6 +57,15 @@ void act_compute_ref(const dtype *x_data, ...@@ -57,6 +57,15 @@ void act_compute_ref(const dtype *x_data,
case 8: // exp case 8: // exp
out_data[i] = expf(x_data[i]); out_data[i] = expf(x_data[i]);
break; break;
case 14: // hard sigmoid
// scale ==> slope
{
float tmp = x_data[i] * scale + threshold;
tmp = tmp < 1.0f ? tmp : 1.0f;
tmp = tmp > 0.0f ? tmp : 0.0f;
out_data[i] = tmp;
break;
}
default: default:
break; break;
} }
...@@ -75,7 +84,7 @@ TEST(act_image2d_fp16, compute) { ...@@ -75,7 +84,7 @@ TEST(act_image2d_fp16, compute) {
for (auto c : {1, 3, 8, 23, 32}) { for (auto c : {1, 3, 8, 23, 32}) {
for (int h = 12; h <= 100; h += 13) { for (int h = 12; h <= 100; h += 13) {
for (int w = 12; w <= 100; w += 25) { for (int w = 12; w <= 100; w += 25) {
for (auto act_type : {1, 2, 4, 5, 6, 7, 8}) { for (auto act_type : {1, 2, 4, 5, 6, 7, 8, 14}) {
for (auto scale : {0.5, 0.8}) { for (auto scale : {0.5, 0.8}) {
for (auto threshold : {6.0}) { for (auto threshold : {6.0}) {
#else #else
...@@ -83,9 +92,9 @@ TEST(act_image2d_fp16, compute) { ...@@ -83,9 +92,9 @@ TEST(act_image2d_fp16, compute) {
const int c = 2; const int c = 2;
const int h = 3; const int h = 3;
const int w = 4; const int w = 4;
const int act_type = 4; const int act_type = 14;
const float scale = 0.5f; const float scale = 2.0f;
const float threshold = 6.f; const float threshold = 1.0f;
#endif // ACT_FP16_LOOP_TEST #endif // ACT_FP16_LOOP_TEST
...@@ -117,6 +126,9 @@ TEST(act_image2d_fp16, compute) { ...@@ -117,6 +126,9 @@ TEST(act_image2d_fp16, compute) {
case 8: // tanh case 8: // tanh
func_name = "exp"; func_name = "exp";
break; break;
case 14: // hard sigmoid
func_name = "hard_sigmoid";
break;
} }
LOG(INFO) << "func_name: " << func_name; LOG(INFO) << "func_name: " << func_name;
// set layout kernels // set layout kernels
...@@ -166,6 +178,9 @@ TEST(act_image2d_fp16, compute) { ...@@ -166,6 +178,9 @@ TEST(act_image2d_fp16, compute) {
actParam.Relu_clipped_coef = threshold; actParam.Relu_clipped_coef = threshold;
actParam.Leaky_relu_alpha = scale; actParam.Leaky_relu_alpha = scale;
actParam.Swish_beta = scale; actParam.Swish_beta = scale;
// hard sigmoid
actParam.hard_sigmoid_slope = scale;
actParam.hard_sigmoid_offset = threshold;
const DDim x_dim = const DDim x_dim =
DDim(std::vector<DDim::value_type>{n, c, h, w}); DDim(std::vector<DDim::value_type>{n, c, h, w});
...@@ -191,7 +206,8 @@ TEST(act_image2d_fp16, compute) { ...@@ -191,7 +206,8 @@ TEST(act_image2d_fp16, compute) {
std::default_random_engine engine; std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-1, 1); std::uniform_real_distribution<float> dist(-1, 1);
for (int i = 0; i < x_dim.production(); ++i) { for (int i = 0; i < x_dim.production(); ++i) {
mapped_x[i] = dist(engine); mapped_x[i] =
(i - x_dim.production() / 2) / 10.; // dist(engine);
mapped_y[i] = 0.0f; mapped_y[i] = 0.0f;
} }
auto *act_in_data = act_in.mutable_data<half_t, cl::Image2D>( auto *act_in_data = act_in.mutable_data<half_t, cl::Image2D>(
...@@ -316,3 +332,6 @@ USE_LITE_KERNEL(relu6, kOpenCL, kFP16, kImageDefault, ImageDefault); ...@@ -316,3 +332,6 @@ USE_LITE_KERNEL(relu6, kOpenCL, kFP16, kImageDefault, ImageDefault);
// sigmoid image2d fp16 // sigmoid image2d fp16
USE_LITE_KERNEL(sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault); USE_LITE_KERNEL(sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
// hard_sigmoid image2d fp16
USE_LITE_KERNEL(hard_sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
...@@ -60,6 +60,7 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { ...@@ -60,6 +60,7 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
param_.active_type = lite_api::ActivationType::kSwish; param_.active_type = lite_api::ActivationType::kSwish;
} else if (opdesc.Type() == "hard_sigmoid") { } else if (opdesc.Type() == "hard_sigmoid") {
// hard_sigomid // hard_sigomid
param_.active_type = lite_api::ActivationType::kHardSigmoid;
param_.hard_sigmoid_slope = opdesc.GetAttr<float>("slope"); param_.hard_sigmoid_slope = opdesc.GetAttr<float>("slope");
param_.hard_sigmoid_offset = opdesc.GetAttr<float>("offset"); param_.hard_sigmoid_offset = opdesc.GetAttr<float>("offset");
} else if (opdesc.Type() == "sigmoid") { } else if (opdesc.Type() == "sigmoid") {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册