未验证 提交 123538d7 编写于 作者: Y ysh329 提交者: GitHub

[KERNEL][OPENCL] Add hard sigmoid opencl kernel. test=develop (#4177)

* [OPENCL] Add hard sigmoid opencl kernel. test=develop

* optimize hard sigmoid. test=develop
上级 9bf15fef
......@@ -55,8 +55,12 @@ const std::string& ActivationTypeToStr(ActivationType act) {
"Tanh",
"Swish",
"Exp",
"Abs",
"HardSwish",
"Reciprocal",
"ThresholdedRelu",
"Elu"};
"Elu",
"HardSigmoid"};
auto x = static_cast<int>(act);
CHECK_LT(x, static_cast<int>(ActivationType::NUM));
return act2string[x];
......
......@@ -109,7 +109,8 @@ enum class ActivationType : int {
kReciprocal = 11,
kThresholdedRelu = 12,
kElu = 13,
NUM = 14,
kHardSigmoid = 14,
NUM = 15,
};
static size_t PrecisionTypeLength(PrecisionType type) {
......
......@@ -66,6 +66,22 @@ __kernel void sigmoid(__read_only image2d_t input,
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
}
__kernel void hard_sigmoid(__read_only image2d_t input,
__write_only image2d_t output,
__private const float value_offset,
__private const float scale) {
const int x = get_global_id(0); // image_width
const int y = get_global_id(1); // image_height
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, sampler, (int2)(x, y));
CL_DTYPE4 out = clamp(in * scale + value_offset, 0.0, 1.0);
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
}
__kernel void leaky_relu(__read_only image2d_t input,
__write_only image2d_t output,
__private const float threshold,
......
......@@ -72,7 +72,12 @@ class ActivationComputeImageDefault
case 8:
kernel_func_name_ = "exp_act";
break;
default:
case 14:
kernel_func_name_ = "hard_sigmoid";
scale_ = act_param_->hard_sigmoid_slope;
threshold_ = act_param_->hard_sigmoid_offset;
break;
defauln:
LOG(FATAL) << "This act type:" << act_type << " doesn't support.";
return;
}
......@@ -316,3 +321,21 @@ REGISTER_LITE_KERNEL(
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.Finalize();
// Hard Sigmoid
REGISTER_LITE_KERNEL(
hard_sigmoid,
kOpenCL,
kFP16,
kImageDefault,
paddle::lite::kernels::opencl::ActivationComputeImageDefault,
ImageDefault)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault))})
.Finalize();
......@@ -57,6 +57,15 @@ void act_compute_ref(const dtype *x_data,
case 8: // exp
out_data[i] = expf(x_data[i]);
break;
case 14: // hard sigmoid
// scale ==> slope
{
float tmp = x_data[i] * scale + threshold;
tmp = tmp < 1.0f ? tmp : 1.0f;
tmp = tmp > 0.0f ? tmp : 0.0f;
out_data[i] = tmp;
break;
}
default:
break;
}
......@@ -75,7 +84,7 @@ TEST(act_image2d_fp16, compute) {
for (auto c : {1, 3, 8, 23, 32}) {
for (int h = 12; h <= 100; h += 13) {
for (int w = 12; w <= 100; w += 25) {
for (auto act_type : {1, 2, 4, 5, 6, 7, 8}) {
for (auto act_type : {1, 2, 4, 5, 6, 7, 8, 14}) {
for (auto scale : {0.5, 0.8}) {
for (auto threshold : {6.0}) {
#else
......@@ -83,9 +92,9 @@ TEST(act_image2d_fp16, compute) {
const int c = 2;
const int h = 3;
const int w = 4;
const int act_type = 4;
const float scale = 0.5f;
const float threshold = 6.f;
const int act_type = 14;
const float scale = 2.0f;
const float threshold = 1.0f;
#endif // ACT_FP16_LOOP_TEST
......@@ -117,6 +126,9 @@ TEST(act_image2d_fp16, compute) {
case 8: // tanh
func_name = "exp";
break;
case 14: // hard sigmoid
func_name = "hard_sigmoid";
break;
}
LOG(INFO) << "func_name: " << func_name;
// set layout kernels
......@@ -166,6 +178,9 @@ TEST(act_image2d_fp16, compute) {
actParam.Relu_clipped_coef = threshold;
actParam.Leaky_relu_alpha = scale;
actParam.Swish_beta = scale;
// hard sigmoid
actParam.hard_sigmoid_slope = scale;
actParam.hard_sigmoid_offset = threshold;
const DDim x_dim =
DDim(std::vector<DDim::value_type>{n, c, h, w});
......@@ -191,7 +206,8 @@ TEST(act_image2d_fp16, compute) {
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-1, 1);
for (int i = 0; i < x_dim.production(); ++i) {
mapped_x[i] = dist(engine);
mapped_x[i] =
(i - x_dim.production() / 2) / 10.; // dist(engine);
mapped_y[i] = 0.0f;
}
auto *act_in_data = act_in.mutable_data<half_t, cl::Image2D>(
......@@ -316,3 +332,6 @@ USE_LITE_KERNEL(relu6, kOpenCL, kFP16, kImageDefault, ImageDefault);
// sigmoid image2d fp16
USE_LITE_KERNEL(sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
// hard_sigmoid image2d fp16
USE_LITE_KERNEL(hard_sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
......@@ -60,6 +60,7 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
param_.active_type = lite_api::ActivationType::kSwish;
} else if (opdesc.Type() == "hard_sigmoid") {
// hard_sigomid
param_.active_type = lite_api::ActivationType::kHardSigmoid;
param_.hard_sigmoid_slope = opdesc.GetAttr<float>("slope");
param_.hard_sigmoid_offset = opdesc.GetAttr<float>("offset");
} else if (opdesc.Type() == "sigmoid") {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册