From c4a0b84cad5e0d2ded9f0594baee5df0f2c5260e Mon Sep 17 00:00:00 2001 From: liutuo Date: Mon, 11 Jun 2018 16:18:31 +0800 Subject: [PATCH] fix build wino transform kernel error --- mace/kernels/opencl/cl/winograd_transform.cl | 310 ------------------- mace/kernels/opencl/winograd_transform.cc | 98 ++---- mace/ops/winograd_convolution_benchmark.cc | 11 - mace/ops/winograd_convolution_test.cc | 42 --- mace/ops/winograd_transform_benchmark.cc | 16 - 5 files changed, 25 insertions(+), 452 deletions(-) diff --git a/mace/kernels/opencl/cl/winograd_transform.cl b/mace/kernels/opencl/cl/winograd_transform.cl index 833b7191..bfc58981 100644 --- a/mace/kernels/opencl/cl/winograd_transform.cl +++ b/mace/kernels/opencl/cl/winograd_transform.cl @@ -235,316 +235,6 @@ __kernel void winograd_inverse_transform_2x2(KERNEL_ERROR_PARAMS } -__kernel void winograd_transform_6x6(KERNEL_ERROR_PARAMS - GLOBAL_WORK_GROUP_SIZE_DIM2 - __read_only image2d_t input, - __write_only image2d_t output, - __private const int in_height, - __private const int in_width, - __private const int in_channel, - __private const int round_hw, - __private const float round_hw_r, - __private const int round_w, - __private const float round_w_r, - __private const int padding_top, - __private const int padding_left) { - int out_width_idx_i = get_global_id(0); - int chan_blk_idx_i = get_global_id(1); - -#ifndef NON_UNIFORM_WORK_GROUP - if (out_width_idx_i >= global_size_dim0 || chan_blk_idx_i >= global_size_dim1) { - return; - } - const int chan_blk_size = global_size_dim1 >> 3; -#else - const int chan_blk_size = get_global_size(1) >> 3; -#endif - __local DATA_TYPE4 in[8][8]; - - int out_width_idx = out_width_idx_i >> 3; - int chan_blk_idx = chan_blk_idx_i >> 3; - int i = mad24(out_width_idx, -8, out_width_idx_i); - int j = mad24(chan_blk_idx, -8, chan_blk_idx_i); - const int batch_idx = out_width_idx / round_hw; - const int t_idx = mad24(batch_idx, -round_hw, out_width_idx); - const int n_round_w = t_idx / round_w; - const int mod_round_w = mad24(n_round_w, -round_w, t_idx); - const int height_idx = mad24(n_round_w, 6, -padding_top); - const int width_idx = mad24(mod_round_w, 6, -padding_left); - - const int nh_idx = mad24(batch_idx, in_height, height_idx); - const int wc_idx = mad24(chan_blk_idx, in_width, width_idx); - - int y = select(nh_idx + j, -1, height_idx + j < 0 || height_idx + j >= in_height); - int x = width_idx + i; - x = select(wc_idx + i, -1, x < 0 || x >= in_width); - in[j][i] = READ_IMAGET(input, SAMPLER, (int2)(x, y)); - barrier(CLK_LOCAL_MEM_FENCE); - - DATA_TYPE4 tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7; - DATA_TYPE4 tmp; - - if (j == 0) { - tmp = 0.5f * (in[1][i] + in[5][i]) - 2.5f * in[3][i]; - tt2 = 1.5f * in[5][i] + tmp; - tt4 = 1.5f * in[1][i] + tmp; - tt0 = in[1][i] - 4.25f * in[3][i] + in[5][i]; - tt1 = in[2][i] - 4.25f * in[4][i] + in[6][i]; - tmp = in[2][i] - 5 * in[4][i]; - tt3 = in[6][i] + 0.25f * tmp; - tt5 = in[6][i] + 3 * in[2][i] + tmp; - tt6 = 5.25f * (in[4][i] - in[2][i]) + in[0][i] - in[6][i]; - tt7 = 5.25f * (in[3][i] - in[5][i]) + in[7][i] - in[1][i]; - in[0][i] = tt6; - in[1][i] = tt1 + tt0; - in[2][i] = tt1 - tt0; - in[3][i] = tt3 + tt2; - in[4][i] = tt3 - tt2; - in[5][i] = tt5 + tt4; - in[6][i] = tt5 - tt4; - in[7][i] = tt7; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(j == 0) { - tmp = 0.5f * (in[i][1] + in[i][5]) - 2.5f * in[i][3]; - tt2 = 1.5f * in[i][5] + tmp; - tt4 = 1.5f * in[i][1] + tmp; - tt0 = in[i][1] - 4.25f * in[i][3] + in[i][5]; - tt1 = in[i][2] - 4.25f * in[i][4] + in[i][6]; - tmp = in[i][2] - 5 * in[i][4]; - tt3 = in[i][6] + 0.25f * tmp; - tt5 = in[i][6] + 3 * in[i][2] + tmp; - tt6 = 5.25f * (in[i][4] - in[i][2]) + in[i][0] - in[i][6]; - tt7 = 5.25f * (in[i][3] - in[i][5]) + in[i][7] - in[i][1]; - in[i][0] = tt6; - in[i][1] = tt1 + tt0; - in[i][2] = tt1 - tt0; - in[i][3] = tt3 + tt2; - in[i][4] = tt3 - tt2; - in[i][5] = tt5 + tt4; - in[i][6] = tt5 - tt4; - in[i][7] = tt7; - } - barrier(CLK_LOCAL_MEM_FENCE); - - chan_blk_idx += mul24(mad24(j, 8, i), chan_blk_size); - WRITE_IMAGET(output, (int2)(out_width_idx, chan_blk_idx), in[j][i]); - -} - -__kernel void winograd_inverse_transform_6x6(KERNEL_ERROR_PARAMS - GLOBAL_WORK_GROUP_SIZE_DIM2 - __read_only image2d_t input, -#ifdef BIAS - __read_only image2d_t bias, /* cout%4 * cout/4 */ -#endif - __write_only image2d_t output, - __private const int out_height, - __private const int out_width, - __private const int round_hw, - __private const float round_hw_r, - __private const int round_w, - __private const float round_w_r, - __private const float relux_max_limit) { - const int width_idx = get_global_id(0); - const int height_idx = get_global_id(1); -#ifndef NON_UNIFORM_WORK_GROUP - if (width_idx >= global_size_dim0 || height_idx >= global_size_dim1) { - return; - } - const int out_channel = global_size_dim1; -#else - const int out_channel = get_global_size(1); -#endif - - DATA_TYPE4 in0[8], in1[8], in2[8], in3[8], in4[8], in5[8], in6[8], in7[8]; - - DATA_TYPE4 tv0[8], tv1[8], tv2[8], tv3[8], tv4[8], tv5[8]; - const int width = width_idx; - const int height = height_idx; - - const int batch = width / round_hw; - const int t = mad24(batch, -round_hw, width); - const int n_round_w = t / round_w; - const int mod_round_w = mad24(n_round_w, -round_w, t); - const int out_height_idx = mul24(n_round_w, 6); - const int out_width_idx = mul24(mod_round_w, 6); - const int out_chan_idx = height; - const int coord_x = mad24(out_chan_idx, out_width, out_width_idx); - const int coord_y = mad24(batch, out_height, out_height_idx); - int h = height_idx; -#pragma unroll - for (short i = 0; i < 8; ++i) { - in0[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in1[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in2[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in3[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in4[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in5[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in6[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } -#pragma unroll - for (short i = 0; i < 8; ++i) { - in7[i] = READ_IMAGET(input, SAMPLER, (int2)(width_idx, h)); - h += out_channel; - } - DATA_TYPE4 tt0, tt1, tt2, tt3, tt4, tt5, d0, d7; -#define PROCESS_IN(i) \ - d0 = in0[i];\ - d7 = in7[i];\ - tt0 = in1[i] + in2[i];\ - tt1 = in1[i] - in2[i];\ - tt2 = in3[i] + in4[i];\ - tt3 = in3[i] - in4[i];\ - tt3 = tt3 + tt3;\ - tt4 = in5[i] + in6[i];\ - tt4 = tt4 + tt4;\ - tt5 = in5[i] - in6[i];\ - tt0 = tt0 + tt2 + tt4;\ - tt1 = tt1 + tt3 + tt5;\ - tv0[i] = tt0 + tt4 * 15 + d0;\ - tv1[i] = tt1 + tt5 * 15;\ - tv2[i] = tt0 + 3 * (tt2 + tt4);\ - tv3[i] = tt1 + 3 * (tt3 + tt5);\ - tv4[i] = tt0 + tt2 * 15;\ - tv5[i] = tt1 + tt3 * 15 + d7;\ - - PROCESS_IN(0); - PROCESS_IN(1); - PROCESS_IN(2); - PROCESS_IN(3); - PROCESS_IN(4); - PROCESS_IN(5); - PROCESS_IN(6); - PROCESS_IN(7); -#undef PROCESS_IN - -#define PROCESS_SND(i) \ - d0 = tv##i[0];\ - d7 = tv##i[7];\ - tt0 = tv##i[1] + tv##i[2];\ - tt1 = tv##i[1] - tv##i[2];\ - tt2 = tv##i[3] + tv##i[4];\ - tt3 = tv##i[3] - tv##i[4];\ - tt3 = tt3 + tt3;\ - tt4 = tv##i[5] + tv##i[6];\ - tt4 = tt4 + tt4;\ - tt5 = tv##i[5] - tv##i[6];\ - tt0 = tt0 + tt2 + tt4;\ - tt1 = tt1 + tt3 + tt5;\ - in##i[0] = tt0 + tt4 * 15 + d0;\ - in##i[1] = tt1 + tt5 * 15;\ - in##i[2] = tt0 + (tt2 + tt4) * 3;\ - in##i[3] = tt1 + (tt3 + tt5) * 3;\ - in##i[4] = tt0 + tt2 * 15;\ - in##i[5] = tt1 + tt3 * 15 + d7; - - PROCESS_SND(0); - PROCESS_SND(1); - PROCESS_SND(2); - PROCESS_SND(3); - PROCESS_SND(4); - PROCESS_SND(5); -#undef PROCESS_SND - -#ifdef BIAS - const DATA_TYPE4 bias_value = READ_IMAGET(bias, SAMPLER, (int2)(out_chan_idx, 0)); -#pragma unroll - for (short i = 0; i < 6; ++i) { - in0[i] += bias_value; - } -#pragma unroll - for (short i = 0; i < 6; ++i) { - in1[i] += bias_value; - } -#pragma unroll - for (short i = 0; i < 6; ++i) { - in2[i] += bias_value; - } -#pragma unroll - for (short i = 0; i < 6; ++i) { - in3[i] += bias_value; - } -#pragma unroll - for (short i = 0; i < 6; ++i) { - in4[i] += bias_value; - } -#pragma unroll - for (short i = 0; i < 6; ++i) { - in5[i] += bias_value; - } -#endif - -#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) -#pragma unroll - for (short i = 0; i < 6; ++i) { - in0[i] = do_activation(in0[i], relux_max_limit); - in1[i] = do_activation(in1[i], relux_max_limit); - in2[i] = do_activation(in2[i], relux_max_limit); - in3[i] = do_activation(in3[i], relux_max_limit); - in4[i] = do_activation(in4[i], relux_max_limit); - in5[i] = do_activation(in5[i], relux_max_limit); - } -#endif - const int num = min(6, out_width - out_width_idx); - const int h_num = out_height - out_height_idx; - if(h_num < 1) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y), in0[i]); - } - if(h_num < 2) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 1), in1[i]); - } - if(h_num < 3) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 2), in2[i]); - } - if(h_num < 4) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 3), in3[i]); - } - if(h_num < 5) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 4), in4[i]); - } - if(h_num < 6) return; -#pragma unroll - for (int i = 0; i < num; ++i) { - WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 5), in5[i]); - } -} - __kernel void winograd_transform_4x4(KERNEL_ERROR_PARAMS GLOBAL_WORK_GROUP_SIZE_DIM2 __read_only image2d_t input, diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 60d1426b..8bfaee3e 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -29,21 +29,19 @@ MaceStatus WinogradTransformFunctor::operator()( if (kernel_.get() == nullptr) { std::string obfuscated_kernel_name; std::set built_options; - if (wino_blk_size_ == 6) { - obfuscated_kernel_name = - MACE_OBFUSCATE_SYMBOL("winograd_transform_6x6"); - built_options.emplace("-Dwinograd_transform_6x6=" - + obfuscated_kernel_name); - } else if (wino_blk_size_ == 4) { + if (wino_blk_size_ == 4) { obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_transform_4x4"); built_options.emplace("-Dwinograd_transform_4x4=" + obfuscated_kernel_name); - } else { + } else if (wino_blk_size_ == 2) { obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2"); built_options.emplace("-Dwinograd_transform_2x2=" + obfuscated_kernel_name); + } else { + MACE_CHECK(false, "mace only supports 4x4 and 2x2 gpu winograd."); + return MACE_SUCCESS; } built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); @@ -90,16 +88,10 @@ MaceStatus WinogradTransformFunctor::operator()( const float round_w_r = 1.f / static_cast(round_w); const index_t blk_sqr = (wino_blk_size_ + 2) * (wino_blk_size_ + 2); - uint32_t gws[2]; - if (wino_blk_size_ == 6) { - gws[0] = static_cast(out_width) * (wino_blk_size_ + 2); - gws[1] = - static_cast(RoundUpDiv4(input_tensor->dim(3))) * - (wino_blk_size_ + 2); - } else { - gws[0] = static_cast(out_width); - gws[1] = static_cast(RoundUpDiv4(input_tensor->dim(3))); - } + const uint32_t gws[2] = { + static_cast(out_width), + static_cast(RoundUpDiv4(input_tensor->dim(3))) + }; if (!IsVecEqual(input_shape_, input_tensor->shape())) { output_shape = {blk_sqr, input_tensor->dim(3), out_width}; std::vector image_shape; @@ -130,57 +122,19 @@ MaceStatus WinogradTransformFunctor::operator()( input_shape_ = input_tensor->shape(); } - if (wino_blk_size_ == 6) { - const std::vector lws = - {static_cast(wino_blk_size_ + 2), - static_cast(wino_blk_size_ + 2), 0}; - cl::Event event; - cl_int error; - if (runtime->IsNonUniformWorkgroupsSupported()) { - error = runtime->command_queue().enqueueNDRangeKernel( - kernel_, cl::NullRange, cl::NDRange(gws[0], gws[1]), - cl::NDRange(lws[0], lws[1]), nullptr, &event); - } else { - std::vector roundup_gws(2, 0); - roundup_gws[0] = RoundUp(gws[0], lws[0]); - roundup_gws[1] = RoundUp(gws[1], lws[1]); - error = runtime->command_queue().enqueueNDRangeKernel( - kernel_, cl::NullRange, - cl::NDRange(roundup_gws[0], roundup_gws[1]), - cl::NDRange(lws[0], lws[1]), nullptr, &event); - } - - - if (runtime->IsOutOfRangeCheckEnabled()) { - kernel_error_->Map(nullptr); - char *kerror_code = kernel_error_->mutable_data(); - MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; - kernel_error_->UnMap(); - } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; - if (future != nullptr) { - future->wait_fn = [runtime, event](CallStats *stats) { - event.wait(); - if (stats != nullptr) { - runtime->GetCallStats(event, stats); - } - }; - } - } else { - const std::vector lws = {kwg_size_ / 8, 8, 0}; - std::string tuning_key = Concat("winograd_transform_kernel", - output_tensor->dim(0), - output_tensor->dim(1), - output_tensor->dim(2)); - TuningOrRun2DKernel(kernel_, tuning_key, gws, lws, future); + const std::vector lws = {kwg_size_ / 8, 8, 0}; + std::string tuning_key = Concat("winograd_transform_kernel", + output_tensor->dim(0), + output_tensor->dim(1), + output_tensor->dim(2)); + TuningOrRun2DKernel(kernel_, tuning_key, gws, lws, future); - if (runtime->IsOutOfRangeCheckEnabled()) { - kernel_error_->Map(nullptr); - char *kerror_code = kernel_error_->mutable_data(); - MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; - kernel_error_->UnMap(); - } + if (runtime->IsOutOfRangeCheckEnabled()) { + kernel_error_->Map(nullptr); + char *kerror_code = kernel_error_->mutable_data(); + MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; + kernel_error_->UnMap(); } return MACE_SUCCESS; @@ -197,21 +151,19 @@ MaceStatus WinogradInverseTransformFunctor::operator()( if (kernel_.get() == nullptr) { std::string obfuscated_kernel_name; std::set built_options; - if (wino_blk_size_ == 6) { - obfuscated_kernel_name = - MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_6x6"); - built_options.emplace("-Dwinograd_inverse_transform_6x6=" - + obfuscated_kernel_name); - } else if (wino_blk_size_ == 4) { + if (wino_blk_size_ == 4) { obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_4x4"); built_options.emplace("-Dwinograd_inverse_transform_4x4=" + obfuscated_kernel_name); - } else { + } else if (wino_blk_size_ == 2) { obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_2x2"); built_options.emplace("-Dwinograd_inverse_transform_2x2=" + obfuscated_kernel_name); + } else { + MACE_CHECK(false, "mace only supports 4x4 and 2x2 gpu winograd."); + return MACE_SUCCESS; } built_options.emplace("-DDATA_TYPE=" + diff --git a/mace/ops/winograd_convolution_benchmark.cc b/mace/ops/winograd_convolution_benchmark.cc index 5bb15603..615e6326 100644 --- a/mace/ops/winograd_convolution_benchmark.cc +++ b/mace/ops/winograd_convolution_benchmark.cc @@ -109,25 +109,14 @@ MACE_BM_WINOGRAD_CONV(1, 256, 256, 3, 16, 2); MACE_BM_WINOGRAD_CONV(1, 64, 64, 3, 16, 4); MACE_BM_WINOGRAD_CONV(1, 128, 128, 3, 16, 4); MACE_BM_WINOGRAD_CONV(1, 256, 256, 3, 16, 4); -MACE_BM_WINOGRAD_CONV(1, 64, 64, 3, 16, 6); -MACE_BM_WINOGRAD_CONV(1, 128, 128, 3, 16, 6); -MACE_BM_WINOGRAD_CONV(1, 256, 256, 3, 16, 6); - MACE_BM_WINOGRAD_CONV(1, 28, 28, 256, 256, 2); MACE_BM_WINOGRAD_CONV(1, 28, 28, 256, 256, 4); -MACE_BM_WINOGRAD_CONV(1, 28, 28, 256, 256, 6); - MACE_BM_WINOGRAD_CONV(1, 56, 56, 256, 256, 2); MACE_BM_WINOGRAD_CONV(1, 56, 56, 256, 256, 4); -MACE_BM_WINOGRAD_CONV(1, 56, 56, 256, 256, 6); - MACE_BM_WINOGRAD_CONV(1, 128, 128, 128, 256, 2); MACE_BM_WINOGRAD_CONV(1, 128, 128, 128, 256, 4); -MACE_BM_WINOGRAD_CONV(1, 128, 128, 128, 256, 6); - MACE_BM_WINOGRAD_CONV(1, 256, 256, 256, 256, 2); MACE_BM_WINOGRAD_CONV(1, 256, 256, 256, 256, 4); -MACE_BM_WINOGRAD_CONV(1, 256, 256, 256, 256, 6); } // namespace test } // namespace ops diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index 3e594500..c2bd6b12 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -141,27 +141,6 @@ TEST_F(WinogradConvlutionTest, BatchConvolutionM2) { Padding::SAME, 2); } -TEST_F(WinogradConvlutionTest, AlignedConvolutionM6) { - WinogradConvolution(1, 32, 32, 3, 3, - Padding::VALID, 6); - WinogradConvolution(1, 32, 32, 3, 3, - Padding::SAME, 6); -} - -TEST_F(WinogradConvlutionTest, UnAlignedConvolutionM6) { - WinogradConvolution(1, 61, 67, 31, 37, - Padding::VALID, 6); - WinogradConvolution(1, 61, 67, 37, 31, - Padding::SAME, 6); -} - -TEST_F(WinogradConvlutionTest, BatchConvolutionM6) { - WinogradConvolution(3, 64, 64, 32, 32, - Padding::VALID, 6); - WinogradConvolution(5, 61, 67, 37, 31, - Padding::SAME, 6); -} - TEST_F(WinogradConvlutionTest, AlignedConvolutionM4) { WinogradConvolution(1, 32, 32, 3, 3, Padding::VALID, 4); @@ -299,27 +278,6 @@ TEST_F(WinogradConvlutionTest, BatchConvolutionWithM2Pad) { 2, 2); } -TEST_F(WinogradConvlutionTest, AlignedConvolutionM6WithPad) { - WinogradConvolutionWithPad(1, 32, 32, 32, 16, - 1, 6); - WinogradConvolutionWithPad(1, 32, 32, 32, 16, - 2, 6); -} - -TEST_F(WinogradConvlutionTest, UnAlignedConvolutionM6WithPad) { - WinogradConvolutionWithPad(1, 61, 67, 31, 37, - 1, 6); - WinogradConvolutionWithPad(1, 61, 67, 37, 31, - 2, 6); -} - -TEST_F(WinogradConvlutionTest, BatchConvolutionWithM6Pad) { - WinogradConvolutionWithPad(3, 64, 64, 32, 32, - 1, 6); - WinogradConvolutionWithPad(5, 61, 67, 37, 31, - 2, 6); -} - TEST_F(WinogradConvlutionTest, AlignedConvolutionM4WithPad) { WinogradConvolutionWithPad(1, 32, 32, 32, 16, 1, 4); diff --git a/mace/ops/winograd_transform_benchmark.cc b/mace/ops/winograd_transform_benchmark.cc index 1f1bb2db..ecba8417 100644 --- a/mace/ops/winograd_transform_benchmark.cc +++ b/mace/ops/winograd_transform_benchmark.cc @@ -74,9 +74,6 @@ MACE_BM_WINO_TRANSFORM(1, 64, 64, 3, 2); MACE_BM_WINO_TRANSFORM(1, 128, 128, 3, 4); MACE_BM_WINO_TRANSFORM(1, 256, 256, 3, 4); MACE_BM_WINO_TRANSFORM(1, 64, 64, 3, 4); -MACE_BM_WINO_TRANSFORM(1, 128, 128, 3, 6); -MACE_BM_WINO_TRANSFORM(1, 256, 256, 3, 6); -MACE_BM_WINO_TRANSFORM(1, 64, 64, 3, 6); namespace { template @@ -142,11 +139,6 @@ MACE_BM_WINO_INVERSE_TRANSFORM(1, 126, 126, 16, 4); MACE_BM_WINO_INVERSE_TRANSFORM(1, 62, 62, 16, 4); MACE_BM_WINO_INVERSE_TRANSFORM(1, 254, 254, 16, 4); -MACE_BM_WINO_INVERSE_TRANSFORM(1, 126, 126, 16, 6); -MACE_BM_WINO_INVERSE_TRANSFORM(1, 62, 62, 16, 6); -MACE_BM_WINO_INVERSE_TRANSFORM(1, 254, 254, 16, 6); - - namespace { template void WinoFilterBufferToImage(int iters, @@ -199,18 +191,12 @@ void WinoFilterBufferToImage(int iters, MACE_BM_WINO_B2I(16, 3, 3, 3, 2); MACE_BM_WINO_B2I(16, 3, 3, 3, 4); -MACE_BM_WINO_B2I(16, 3, 3, 3, 6); - MACE_BM_WINO_B2I(32, 3, 3, 3, 2); MACE_BM_WINO_B2I(32, 3, 3, 3, 4); -MACE_BM_WINO_B2I(32, 3, 3, 3, 6); MACE_BM_WINO_B2I(128, 3, 3, 3, 2); MACE_BM_WINO_B2I(128, 3, 3, 3, 4); -MACE_BM_WINO_B2I(128, 3, 3, 3, 6); MACE_BM_WINO_B2I(256, 3, 3, 3, 2); MACE_BM_WINO_B2I(256, 3, 3, 3, 4); -MACE_BM_WINO_B2I(256, 3, 3, 3, 6); - namespace { template @@ -278,10 +264,8 @@ void WinoMatMulBenchmark( MACE_BM_WINO_MATMUL(16, 3, 128, 128, 2); MACE_BM_WINO_MATMUL(16, 3, 128, 128, 4); -MACE_BM_WINO_MATMUL(16, 3, 128, 128, 6); MACE_BM_WINO_MATMUL(32, 3, 256, 256, 2); MACE_BM_WINO_MATMUL(32, 3, 256, 256, 4); -MACE_BM_WINO_MATMUL(32, 3, 256, 256, 6); } // namespace test } // namespace ops -- GitLab