提交 9f343156 编写于 作者: xiebaiyuan's avatar xiebaiyuan

[OPENCL] use depthwise3x3d1 to replace s1 enhance hn performance ,test=develop

上级 24d37695
......@@ -244,7 +244,7 @@ __kernel void depth_conv2d_3x3(
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos, output);
}
__kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk,
__kernel void depth_conv2d_3x3d1(__private const int ou_ch_blk,
__private const int ou_w_blk,
__private const int ou_nh,
__read_only image2d_t input,
......
......@@ -149,9 +149,9 @@ void ConvImageCompute::PrepareForRun() {
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h == 3 && kernel_w == 3 && groups > 1) {
// depth_conv2d_3x3s1, depth_conv2d_3x3
if (stride_h == 1 && dilations[0] == 1) {
kernel_func_names_.push_back("depth_conv2d_3x3s1");
impl_ = &ConvImageCompute::DepthwiseConv2d3x3s1;
if (dilations[0] == 1) {
kernel_func_names_.push_back("depth_conv2d_3x3d1");
impl_ = &ConvImageCompute::DepthwiseConv2d3x3d1;
{
// depthwise spl gws s1
int c_block = (output_dims[1] + 3) / 4;
......@@ -1462,7 +1462,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
CLRuntime::Global()->command_queue().finish();
}
}
void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
void ConvImageCompute::DepthwiseConv2d3x3d1(bool is_turn) {
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
const auto& param = *param_.get_mutable<param_t>();
......
......@@ -63,7 +63,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
void Conv2d5x5opt(bool is_turn = false);
void Conv2d7x7(bool is_turn = false);
void Conv2d7x7opt(bool is_turn = false);
void DepthwiseConv2d3x3s1(bool is_turn = false);
void DepthwiseConv2d3x3d1(bool is_turn = false);
void DepthwiseConv2d3x3(bool is_turn = false);
void DepthwiseConv2d(bool is_turn = false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册