未验证 提交 9e361a4d 编写于 作者: Y yiicy 提交者: GitHub

[ARM] int8 direct_conv, dw_conv add relu6 and leaky relu fusion, test=develop (#3737)

int8 direct_conv, dw_conv add relu6 and leaky relu fusion
上级 cba42f0d
...@@ -36,7 +36,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout, ...@@ -36,7 +36,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -434,7 +435,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout, ...@@ -434,7 +435,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
...@@ -450,7 +452,8 @@ template void conv_depthwise_3x3s1_int8<int8_t>(int8_t* dout, ...@@ -450,7 +452,8 @@ template void conv_depthwise_3x3s1_int8<int8_t>(int8_t* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -467,7 +470,8 @@ template void conv_depthwise_3x3s1_int8<float>(float* dout, ...@@ -467,7 +470,8 @@ template void conv_depthwise_3x3s1_int8<float>(float* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
......
...@@ -42,8 +42,30 @@ void conv_3x3s1_direct_int8(const int8_t* din, ...@@ -42,8 +42,30 @@ void conv_3x3s1_direct_int8(const int8_t* din,
Context<TARGET(kARM)>* ctx, Context<TARGET(kARM)>* ctx,
const float* scale) { const float* scale) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
...@@ -442,7 +464,8 @@ void conv_3x3s1_direct_int8(const int8_t* din, ...@@ -442,7 +464,8 @@ void conv_3x3s1_direct_int8(const int8_t* din,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
......
...@@ -36,7 +36,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout, ...@@ -36,7 +36,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -447,7 +448,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout, ...@@ -447,7 +448,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
...@@ -463,7 +465,8 @@ template void conv_depthwise_3x3s2_int8<int8_t>(int8_t* dout, ...@@ -463,7 +465,8 @@ template void conv_depthwise_3x3s2_int8<int8_t>(int8_t* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -480,7 +483,8 @@ template void conv_depthwise_3x3s2_int8<float>(float* dout, ...@@ -480,7 +483,8 @@ template void conv_depthwise_3x3s2_int8<float>(float* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
......
...@@ -47,8 +47,30 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -47,8 +47,30 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings; auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
...@@ -442,7 +464,8 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -442,7 +464,8 @@ void conv_3x3s2_direct_int8(const int8_t* din,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
...@@ -474,8 +497,30 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -474,8 +497,30 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings; auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
const int threads = ctx->threads(); const int threads = ctx->threads();
...@@ -698,7 +743,8 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -698,7 +743,8 @@ void conv_3x3s2_direct_int8(const int8_t* din,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
......
...@@ -36,7 +36,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout, ...@@ -36,7 +36,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -726,7 +727,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout, ...@@ -726,7 +727,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
...@@ -742,7 +744,8 @@ template void conv_depthwise_5x5s1_int8<int8_t>(int8_t* dout, ...@@ -742,7 +744,8 @@ template void conv_depthwise_5x5s1_int8<int8_t>(int8_t* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -759,7 +762,8 @@ template void conv_depthwise_5x5s1_int8<float>(float* dout, ...@@ -759,7 +762,8 @@ template void conv_depthwise_5x5s1_int8<float>(float* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
......
...@@ -36,7 +36,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout, ...@@ -36,7 +36,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -746,7 +747,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout, ...@@ -746,7 +747,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
chout, chout,
hout, hout,
wout, wout,
flag_relu, flag_act,
alpha,
bias_local, bias_local,
flag_bias, flag_bias,
ptr_write, ptr_write,
...@@ -762,7 +764,8 @@ template void conv_depthwise_5x5s2_int8<int8_t>(int8_t* dout, ...@@ -762,7 +764,8 @@ template void conv_depthwise_5x5s2_int8<int8_t>(int8_t* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -779,7 +782,8 @@ template void conv_depthwise_5x5s2_int8<float>(float* dout, ...@@ -779,7 +782,8 @@ template void conv_depthwise_5x5s2_int8<float>(float* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
......
...@@ -94,7 +94,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout, ...@@ -94,7 +94,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -112,7 +113,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout, ...@@ -112,7 +113,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -178,7 +180,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout, ...@@ -178,7 +180,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
...@@ -196,7 +199,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout, ...@@ -196,7 +199,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
const float* scale, const float* scale,
const float* bias, const float* bias,
bool flag_bias, bool flag_bias,
bool flag_relu, int flag_act,
float* alpha,
int num, int num,
int chin, int chin,
int hin, int hin,
......
...@@ -790,8 +790,30 @@ void conv_depthwise_3x3_int8_fp32(const void* din, ...@@ -790,8 +790,30 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
if (stride == 1) { if (stride == 1) {
conv_depthwise_3x3s1_int8(reinterpret_cast<float*>(dout), conv_depthwise_3x3s1_int8(reinterpret_cast<float*>(dout),
reinterpret_cast<const int8_t*>(din), reinterpret_cast<const int8_t*>(din),
...@@ -799,7 +821,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din, ...@@ -799,7 +821,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -816,7 +839,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din, ...@@ -816,7 +839,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -849,8 +873,30 @@ void conv_depthwise_3x3_int8_int8(const void* din, ...@@ -849,8 +873,30 @@ void conv_depthwise_3x3_int8_int8(const void* din,
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
if (stride == 1) { if (stride == 1) {
conv_depthwise_3x3s1_int8(reinterpret_cast<int8_t*>(dout), conv_depthwise_3x3s1_int8(reinterpret_cast<int8_t*>(dout),
reinterpret_cast<const int8_t*>(din), reinterpret_cast<const int8_t*>(din),
...@@ -858,7 +904,8 @@ void conv_depthwise_3x3_int8_int8(const void* din, ...@@ -858,7 +904,8 @@ void conv_depthwise_3x3_int8_int8(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -875,7 +922,8 @@ void conv_depthwise_3x3_int8_int8(const void* din, ...@@ -875,7 +922,8 @@ void conv_depthwise_3x3_int8_int8(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -908,8 +956,30 @@ void conv_depthwise_5x5_int8_fp32(const void* din, ...@@ -908,8 +956,30 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
if (stride == 1) { if (stride == 1) {
conv_depthwise_5x5s1_int8(reinterpret_cast<float*>(dout), conv_depthwise_5x5s1_int8(reinterpret_cast<float*>(dout),
reinterpret_cast<const int8_t*>(din), reinterpret_cast<const int8_t*>(din),
...@@ -917,7 +987,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din, ...@@ -917,7 +987,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -934,7 +1005,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din, ...@@ -934,7 +1005,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -967,8 +1039,30 @@ void conv_depthwise_5x5_int8_int8(const void* din, ...@@ -967,8 +1039,30 @@ void conv_depthwise_5x5_int8_int8(const void* din,
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[2]; int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
auto act_param = param.activation_param;
auto act_type = act_param.active_type;
int flag_act = 0; // relu: 1, relu6: 2, leakey: 3
float alpha[4] = {0.f, 0.f, 0.f, 0.f};
if (act_param.has_active) {
if (act_type == lite_api::ActivationType::kRelu) {
flag_act = 1;
} else if (act_type == lite_api::ActivationType::kRelu6) {
flag_act = 2;
float local_alpha = act_param.Relu_clipped_coef;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
} else if (act_type == lite_api::ActivationType::kLeakyRelu) {
flag_act = 3;
float local_alpha = act_param.Leaky_relu_alpha;
alpha[0] = local_alpha;
alpha[1] = local_alpha;
alpha[2] = local_alpha;
alpha[3] = local_alpha;
}
}
if (stride == 1) { if (stride == 1) {
conv_depthwise_5x5s1_int8(reinterpret_cast<int8_t*>(dout), conv_depthwise_5x5s1_int8(reinterpret_cast<int8_t*>(dout),
reinterpret_cast<const int8_t*>(din), reinterpret_cast<const int8_t*>(din),
...@@ -976,7 +1070,8 @@ void conv_depthwise_5x5_int8_int8(const void* din, ...@@ -976,7 +1070,8 @@ void conv_depthwise_5x5_int8_int8(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
...@@ -993,7 +1088,8 @@ void conv_depthwise_5x5_int8_int8(const void* din, ...@@ -993,7 +1088,8 @@ void conv_depthwise_5x5_int8_int8(const void* din,
scale, scale,
bias, bias,
flag_bias, flag_bias,
flag_relu, flag_act,
alpha,
num, num,
ch_in, ch_in,
h_in, h_in,
......
...@@ -534,18 +534,18 @@ inline void gemm_int8_kernel(const int8_t* a_ptr, ...@@ -534,18 +534,18 @@ inline void gemm_int8_kernel(const int8_t* a_ptr,
"fmin v17.4s, v17.4s, v1.4s\n" /* relu6 */ \ "fmin v17.4s, v17.4s, v1.4s\n" /* relu6 */ \
"fmin v18.4s, v18.4s, v1.4s\n" /* relu6 */ \ "fmin v18.4s, v18.4s, v1.4s\n" /* relu6 */ \
"fmin v19.4s, v19.4s, v1.4s\n" /* relu6 */ \ "fmin v19.4s, v19.4s, v1.4s\n" /* relu6 */ \
"fmin v20.4s, v20.4s, v0.4s\n" /* relu6 */ \ "fmin v20.4s, v20.4s, v1.4s\n" /* relu6 */ \
"fmin v21.4s, v21.4s, v0.4s\n" /* relu6 */ \ "fmin v21.4s, v21.4s, v1.4s\n" /* relu6 */ \
"fmin v22.4s, v22.4s, v0.4s\n" /* relu6 */ \ "fmin v22.4s, v22.4s, v1.4s\n" /* relu6 */ \
"fmin v23.4s, v23.4s, v0.4s\n" /* relu6 */ \ "fmin v23.4s, v23.4s, v1.4s\n" /* relu6 */ \
"fmin v24.4s, v24.4s, v0.4s\n" /* relu6 */ \ "fmin v24.4s, v24.4s, v1.4s\n" /* relu6 */ \
"fmin v25.4s, v25.4s, v0.4s\n" /* relu6 */ \ "fmin v25.4s, v25.4s, v1.4s\n" /* relu6 */ \
"fmin v26.4s, v26.4s, v0.4s\n" /* relu6 */ \ "fmin v26.4s, v26.4s, v1.4s\n" /* relu6 */ \
"fmin v27.4s, v27.4s, v0.4s\n" /* relu6 */ \ "fmin v27.4s, v27.4s, v1.4s\n" /* relu6 */ \
"fmin v28.4s, v28.4s, v0.4s\n" /* relu6 */ \ "fmin v28.4s, v28.4s, v1.4s\n" /* relu6 */ \
"fmin v29.4s, v29.4s, v0.4s\n" /* relu6 */ \ "fmin v29.4s, v29.4s, v1.4s\n" /* relu6 */ \
"fmin v30.4s, v30.4s, v0.4s\n" /* relu6 */ \ "fmin v30.4s, v30.4s, v1.4s\n" /* relu6 */ \
"fmin v31.4s, v31.4s, v0.4s\n" /* relu6 */ \ "fmin v31.4s, v31.4s, v1.4s\n" /* relu6 */ \
"b 9f \n" /* relu end */ "b 9f \n" /* relu end */
#define GEMM_INT8_LEAKY_RELU \ #define GEMM_INT8_LEAKY_RELU \
......
...@@ -169,6 +169,12 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -169,6 +169,12 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
} }
flag_trans_bias_ = true; flag_trans_bias_ = true;
} }
//! update relu6 parameter
if (param.activation_param.has_active &&
param.activation_param.active_type == lite_api::ActivationType::kRelu6) {
param.activation_param.Relu_clipped_coef =
param.activation_param.Relu_clipped_coef / param.output_scale;
}
/// select dw conv kernel /// select dw conv kernel
if (kw == 3) { if (kw == 3) {
// trans weights // trans weights
......
...@@ -39,7 +39,8 @@ inline bool direct_conv_trans_weights( ...@@ -39,7 +39,8 @@ inline bool direct_conv_trans_weights(
const std::vector<float>& w_scale, const std::vector<float>& w_scale,
float in_scale, float in_scale,
float out_scale, float out_scale,
std::vector<float>& merge_scale) { // NOLINT std::vector<float>& merge_scale, // NOLINT
float* relu_clipped_coef) {
constexpr int cblock = 4; constexpr int cblock = 4;
int oc = win->dims()[0]; int oc = win->dims()[0];
int ic = win->dims()[1]; int ic = win->dims()[1];
...@@ -64,7 +65,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kFloat)>( ...@@ -64,7 +65,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kFloat)>(
const std::vector<float>& w_scale, const std::vector<float>& w_scale,
float in_scale, float in_scale,
float out_scale, float out_scale,
std::vector<float>& merge_scale) { // NOLINT std::vector<float>& merge_scale, // NOLINT
float* relu_clipped_coef) {
int cblock = 4; int cblock = 4;
if (stride == 2) { if (stride == 2) {
cblock = lite::arm::math::conv_3x3s2_direct_int8_c_num(); cblock = lite::arm::math::conv_3x3s2_direct_int8_c_num();
...@@ -103,7 +105,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>( ...@@ -103,7 +105,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>(
const std::vector<float>& w_scale, const std::vector<float>& w_scale,
float in_scale, float in_scale,
float out_scale, float out_scale,
std::vector<float>& merge_scale) { // NOLINT std::vector<float>& merge_scale, // NOLINT
float* relu_clipped_coef) {
int cblock = 4; int cblock = 4;
if (stride == 2) { if (stride == 2) {
cblock = lite::arm::math::conv_3x3s2_direct_int8_c_num(); cblock = lite::arm::math::conv_3x3s2_direct_int8_c_num();
...@@ -130,6 +133,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>( ...@@ -130,6 +133,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>(
merge_scale[i] = w_scale[i] * scale; merge_scale[i] = w_scale[i] * scale;
} }
} }
/// update relu_clipped_coef
*relu_clipped_coef /= out_scale;
/// update bias /// update bias
if (bin) { if (bin) {
bout->Resize(bin->dims()); bout->Resize(bin->dims());
...@@ -167,16 +172,17 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -167,16 +172,17 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> {
<< "direct conv only support conv3x3s1 and conv3x3s2"; << "direct conv only support conv3x3s1 and conv3x3s2";
CHECK(kw == 3 && kh == 3) CHECK(kw == 3 && kh == 3)
<< "direct conv only support conv3x3s1 and conv3x3s2"; << "direct conv only support conv3x3s1 and conv3x3s2";
flag_trans_bias_ = flag_trans_bias_ = direct_conv_trans_weights<Ptype, OutType>(
direct_conv_trans_weights<Ptype, OutType>(param.filter, param.filter,
&weights_, &weights_,
param.bias, param.bias,
&bias_, &bias_,
sw, sw,
param.weight_scale, param.weight_scale,
param.input_scale, param.input_scale,
param.output_scale, param.output_scale,
w_scale_); w_scale_,
&param.activation_param.Relu_clipped_coef);
} }
virtual void Run(); virtual void Run();
......
...@@ -56,7 +56,7 @@ DEFINE_int32(dila_w, 1, "dilation width"); ...@@ -56,7 +56,7 @@ DEFINE_int32(dila_w, 1, "dilation width");
DEFINE_bool(flag_act, true, "do act"); DEFINE_bool(flag_act, true, "do act");
DEFINE_bool(flag_bias, true, "with bias"); DEFINE_bool(flag_bias, true, "with bias");
DEFINE_double(clipped_coef, 1.0, "clipped relu coef"); DEFINE_double(clipped_coef, 1.0, "clipped relu coef");
DEFINE_double(leakey_relu_alpha, 8.88, "leakey relu alpha"); DEFINE_double(leakey_relu_alpha, 2.22, "leakey relu alpha");
typedef paddle::lite::DDim DDim; typedef paddle::lite::DDim DDim;
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
...@@ -188,7 +188,14 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -188,7 +188,14 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
} }
std::vector<float> scale_in{1.f / 127}; std::vector<float> scale_in{1.f / 127};
std::vector<float> scale_out{weight_dim.count(1, 4) / 127.f}; std::vector<float> scale_out(1, weight_dim.count(1, 4) / 127.f);
if (flag_act == 2) {
scale_out[0] = six / 127.f;
} else if (flag_act == 4) {
if (std::abs(alpha) > 1) {
scale_out[0] *= std::abs(alpha);
}
}
std::vector<float> scale_w(weight_dim[0], 1.f / 127); std::vector<float> scale_w(weight_dim[0], 1.f / 127);
param_int8_out.input_scale = scale_in[0]; param_int8_out.input_scale = scale_in[0];
...@@ -484,7 +491,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) { ...@@ -484,7 +491,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1}) { for (auto& pad : {0, 1}) {
for (auto& flag_bias : {false, true}) { for (auto& flag_bias : {false, true}) {
for (auto& flag_act : {0, 1}) { for (auto& flag_act : {0, 1, 2, 4}) {
for (auto& c : {1, 3, 5, 8, 16, 32}) { for (auto& c : {1, 3, 5, 8, 16, 32}) {
std::vector<DDim> dims; std::vector<DDim> dims;
DDim weights_dim({c, 1, 3, 3}); DDim weights_dim({c, 1, 3, 3});
...@@ -520,7 +527,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) { ...@@ -520,7 +527,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1, 2, 3, 4}) { for (auto& pad : {0, 1, 2, 3, 4}) {
for (auto& flag_bias : {false, true}) { for (auto& flag_bias : {false, true}) {
for (auto& flag_act : {0, 1}) { for (auto& flag_act : {0, 1, 2, 4}) {
for (auto& c : {1, 5, 15, 33}) { for (auto& c : {1, 5, 15, 33}) {
std::vector<DDim> dims; std::vector<DDim> dims;
DDim weights_dim({c, 1, 5, 5}); DDim weights_dim({c, 1, 5, 5});
...@@ -553,7 +560,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) { ...@@ -553,7 +560,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
#if 1 /// conv1x1s1 #if 1 /// conv1x1s1
TEST(TestConv1x1s1Int8, test_conv1x1s1) { TEST(TestConv1x1s1Int8, test_conv1x1s1) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 3, 8, 32}) { for (auto& cin : {1, 3, 8, 33}) {
for (auto& cout : {1, 5, 17}) { for (auto& cout : {1, 5, 17}) {
for (auto& g : {1, 2}) { for (auto& g : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& flag_bias : {false, true}) {
...@@ -599,7 +606,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) { ...@@ -599,7 +606,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
for (auto& pad_left : {1, 2}) { for (auto& pad_left : {1, 2}) {
for (auto& pad_right : {1, 2}) { for (auto& pad_right : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& flag_bias : {false, true}) {
for (auto& flag_act : {0, 1}) { for (auto& flag_act : {0, 1, 2, 4}) {
std::vector<DDim> dims; std::vector<DDim> dims;
DDim weights_dim({cout, cin, 3, 3}); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) { for (auto& batch : {1, 2}) {
...@@ -641,7 +648,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) { ...@@ -641,7 +648,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
for (auto& pad_left : {1, 2}) { for (auto& pad_left : {1, 2}) {
for (auto& pad_right : {1, 2}) { for (auto& pad_right : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& flag_bias : {false, true}) {
for (auto& flag_act : {0, 1}) { for (auto& flag_act : {0, 1, 2, 4}) {
std::vector<DDim> dims; std::vector<DDim> dims;
DDim weights_dim({cout, cin, 3, 3}); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) { for (auto& batch : {1, 2}) {
...@@ -673,7 +680,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) { ...@@ -673,7 +680,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
} }
#endif /// conv3x3s2 #endif /// conv3x3s2
#if 0 /// random param conv #if 1 /// random param conv
TEST(TestConvRandInt8, test_conv_rand) { TEST(TestConvRandInt8, test_conv_rand) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 17}) { for (auto& cin : {1, 17}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册