提交 9f343156 编写于 作者: xiebaiyuan's avatar xiebaiyuan

[OPENCL] use depthwise3x3d1 to replace s1 enhance hn performance ,test=develop

上级 24d37695
...@@ -244,7 +244,7 @@ __kernel void depth_conv2d_3x3( ...@@ -244,7 +244,7 @@ __kernel void depth_conv2d_3x3(
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos, output); WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos, output);
} }
__kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk, __kernel void depth_conv2d_3x3d1(__private const int ou_ch_blk,
__private const int ou_w_blk, __private const int ou_w_blk,
__private const int ou_nh, __private const int ou_nh,
__read_only image2d_t input, __read_only image2d_t input,
......
...@@ -149,9 +149,9 @@ void ConvImageCompute::PrepareForRun() { ...@@ -149,9 +149,9 @@ void ConvImageCompute::PrepareForRun() {
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] && } else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h == 3 && kernel_w == 3 && groups > 1) { kernel_h == 3 && kernel_w == 3 && groups > 1) {
// depth_conv2d_3x3s1, depth_conv2d_3x3 // depth_conv2d_3x3s1, depth_conv2d_3x3
if (stride_h == 1 && dilations[0] == 1) { if (dilations[0] == 1) {
kernel_func_names_.push_back("depth_conv2d_3x3s1"); kernel_func_names_.push_back("depth_conv2d_3x3d1");
impl_ = &ConvImageCompute::DepthwiseConv2d3x3s1; impl_ = &ConvImageCompute::DepthwiseConv2d3x3d1;
{ {
// depthwise spl gws s1 // depthwise spl gws s1
int c_block = (output_dims[1] + 3) / 4; int c_block = (output_dims[1] + 3) / 4;
...@@ -1462,7 +1462,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) { ...@@ -1462,7 +1462,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
CLRuntime::Global()->command_queue().finish(); CLRuntime::Global()->command_queue().finish();
} }
} }
void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { void ConvImageCompute::DepthwiseConv2d3x3d1(bool is_turn) {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
const auto& param = *param_.get_mutable<param_t>(); const auto& param = *param_.get_mutable<param_t>();
......
...@@ -63,7 +63,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -63,7 +63,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
void Conv2d5x5opt(bool is_turn = false); void Conv2d5x5opt(bool is_turn = false);
void Conv2d7x7(bool is_turn = false); void Conv2d7x7(bool is_turn = false);
void Conv2d7x7opt(bool is_turn = false); void Conv2d7x7opt(bool is_turn = false);
void DepthwiseConv2d3x3s1(bool is_turn = false); void DepthwiseConv2d3x3d1(bool is_turn = false);
void DepthwiseConv2d3x3(bool is_turn = false); void DepthwiseConv2d3x3(bool is_turn = false);
void DepthwiseConv2d(bool is_turn = false); void DepthwiseConv2d(bool is_turn = false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册