diff --git a/mace/kernels/opencl/cl/activation.cl b/mace/kernels/opencl/cl/activation.cl index d5d7dc4a12f634b50d22b2a7f11e4adb92cd05a4..25e79b3db2e36f72ec961b1a6f3206b416ce81dd 100644 --- a/mace/kernels/opencl/cl/activation.cl +++ b/mace/kernels/opencl/cl/activation.cl @@ -31,7 +31,5 @@ __kernel void activation(KERNEL_ERROR_PARAMS DATA_TYPE4 out = do_activation(in, relux_max_limit); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/addn.cl b/mace/kernels/opencl/cl/addn.cl index 52c8ab6d2b6ef1415da7fe27d3a54014158bb931..1e8616f3825e0eebff7e09dae414af05e1d5fb32 100644 --- a/mace/kernels/opencl/cl/addn.cl +++ b/mace/kernels/opencl/cl/addn.cl @@ -32,7 +32,6 @@ __kernel void addn(KERNEL_ERROR_PARAMS out = out + in3; #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error); WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/kernels/opencl/cl/batch_norm.cl index 8303b3ffd3f58a936a3b5287fb6f3b646d8032c2..064d8ecc4b88310a1817e701823a2b75c8db1036 100644 --- a/mace/kernels/opencl/cl/batch_norm.cl +++ b/mace/kernels/opencl/cl/batch_norm.cl @@ -49,7 +49,5 @@ __kernel void batch_norm(KERNEL_ERROR_PARAMS out = do_activation(out, relux_max_limit); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/bias_add.cl b/mace/kernels/opencl/cl/bias_add.cl index a09de1fb1aad297fcaf723af8143ba7519e11891..2cd5fb5caa9dc3c39c82bb6ff2c0f1c16dc87a66 100644 --- a/mace/kernels/opencl/cl/bias_add.cl +++ b/mace/kernels/opencl/cl/bias_add.cl @@ -24,7 +24,5 @@ __kernel void bias_add(KERNEL_ERROR_PARAMS DATA_TYPE4 bias_value = READ_IMAGET(bias, SAMPLER, (int2)(ch_blk, 0)); DATA_TYPE4 out = in + bias_value; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/buffer_to_image.cl b/mace/kernels/opencl/cl/buffer_to_image.cl index 32c6fc6ce792ebdd03dcff23a07ac4b1f6aed72a..38af3d5b6158995d67daf76db6bb3782b43032c6 100644 --- a/mace/kernels/opencl/cl/buffer_to_image.cl +++ b/mace/kernels/opencl/cl/buffer_to_image.cl @@ -50,7 +50,6 @@ __kernel void filter_buffer_to_image(KERNEL_ERROR_PARAMS } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -158,7 +157,6 @@ __kernel void dw_filter_buffer_to_image(KERNEL_ERROR_PARAMS } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -201,7 +199,6 @@ __kernel void in_out_buffer_to_image(KERNEL_ERROR_PARAMS values = vload4(0, input + offset); } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -278,7 +275,6 @@ __kernel void arg_buffer_to_image(KERNEL_ERROR_PARAMS values = vload4(0, input + offset); } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -356,7 +352,6 @@ __kernel void in_out_height_buffer_to_image(KERNEL_ERROR_PARAMS values.x = *(input + offset); } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -438,7 +433,6 @@ __kernel void in_out_width_buffer_to_image(KERNEL_ERROR_PARAMS values.x = *(input + offset); } int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error); WRITE_IMAGET(output, coord, values); } @@ -513,28 +507,40 @@ __kernel void winograd_filter_buffer_to_image(KERNEL_ERROR_PARAMS int2 coord = (int2)(w, h); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord.x, coord.y + out_channels * 15, kernel_error); - -#pragma unroll - for (short i = 0; i < 4; ++i) { - WRITE_IMAGET(output, coord, tu0[i]); - coord.y += out_channels; - } -#pragma unroll - for (short i = 0; i < 4; ++i) { - WRITE_IMAGET(output, coord, tu1[i]); - coord.y += out_channels; - } -#pragma unroll - for (short i = 0; i < 4; ++i) { - WRITE_IMAGET(output, coord, tu2[i]); - coord.y += out_channels; - } -#pragma unroll - for (short i = 0; i < 4; ++i) { - WRITE_IMAGET(output, coord, tu3[i]); - coord.y += out_channels; - } + WRITE_IMAGET(output, coord, tu0[0]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu0[1]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu0[2]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu0[3]); + coord.y += out_channels; + + WRITE_IMAGET(output, coord, tu1[0]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu1[1]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu1[2]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu1[3]); + coord.y += out_channels; + + WRITE_IMAGET(output, coord, tu2[0]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu2[1]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu2[2]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu2[3]); + coord.y += out_channels; + + WRITE_IMAGET(output, coord, tu3[0]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu3[1]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu3[2]); + coord.y += out_channels; + WRITE_IMAGET(output, coord, tu3[3]); } // only support 3x3 now diff --git a/mace/kernels/opencl/cl/channel_shuffle.cl b/mace/kernels/opencl/cl/channel_shuffle.cl index 1674cc1db49db146adf7a258bb570c3bda5fba30..c404130eb0fd04d9f900b986405c1bd616bcbc8b 100644 --- a/mace/kernels/opencl/cl/channel_shuffle.cl +++ b/mace/kernels/opencl/cl/channel_shuffle.cl @@ -51,8 +51,6 @@ __kernel void channel_shuffle(KERNEL_ERROR_PARAMS int out_x = mad24(mad24(group_chan_blk_idx, groups, g_blk), width, width_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x + groups_blks_width * 3, hb_idx, kernel_error); - WRITE_IMAGET(output, (int2)(out_x, hb_idx), out_chan_data0); out_x += groups_blks_width; diff --git a/mace/kernels/opencl/cl/common.h b/mace/kernels/opencl/cl/common.h index 905987b4f56e98d2f5160db079e33b65fd611b1f..51cb3830a157386ed335342bba1b3683d32310a9 100644 --- a/mace/kernels/opencl/cl/common.h +++ b/mace/kernels/opencl/cl/common.h @@ -14,9 +14,19 @@ #define CMD_TYPE(cmd, type) CMD_TYPE_STR(cmd, type) #define DATA_TYPE4 VEC_DATA_TYPE(DATA_TYPE, 4) -#define READ_IMAGET CMD_TYPE(read_image, CMD_DATA_TYPE) -#define WRITE_IMAGET CMD_TYPE(write_image, CMD_DATA_TYPE) +#ifdef OUT_OF_RANGE_CHECK +#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, coord) \ + check_out_of_range_for_image2d(image, (coord).x, (coord).y, kernel_error); +#else +#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, coord) +#endif + +#define READ_IMAGET(image, coord, value) \ + CMD_TYPE(read_image, CMD_DATA_TYPE)(image, coord, value) +#define WRITE_IMAGET(image, coord, value) \ + CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, coord) \ + CMD_TYPE(write_image, CMD_DATA_TYPE)(image, coord, value); #ifndef NON_UNIFORM_WORK_GROUP @@ -47,13 +57,6 @@ #endif -#ifdef OUT_OF_RANGE_CHECK -#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, x, y, kernel_error) \ - check_out_of_range_for_image2d(image, x, y, kernel_error) -#else -#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, x, y, kernel_error) -#endif - __constant sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; diff --git a/mace/kernels/opencl/cl/concat.cl b/mace/kernels/opencl/cl/concat.cl index 76ab1192477f9b46451ddb35dbc5cdd99d6dab29..e6e78e8651286118f8c7f7772eebbfe6c138cfa2 100644 --- a/mace/kernels/opencl/cl/concat.cl +++ b/mace/kernels/opencl/cl/concat.cl @@ -82,8 +82,6 @@ __kernel void concat_channel(KERNEL_ERROR_PARAMS const int pos = mad24(chan_blk_idx, width, width_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } @@ -114,8 +112,6 @@ __kernel void concat_channel_multi(KERNEL_ERROR_PARAMS const int pos = mad24(chan_blk_idx + chan_blk_offset, width, width_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/conv_2d.cl b/mace/kernels/opencl/cl/conv_2d.cl index 5b27b1e9337cefd5093c0026c13172eabff58bf8..5289e513301d41d2684be090597636f263b2b601 100644 --- a/mace/kernels/opencl/cl/conv_2d.cl +++ b/mace/kernels/opencl/cl/conv_2d.cl @@ -129,22 +129,18 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, out_width); int w = out_w_blk; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/conv_2d_1x1.cl b/mace/kernels/opencl/cl/conv_2d_1x1.cl index 48274184f3619fa83ae37bb828729b05a8fd2b20..65d2b9c68e8680ac62763df59daee3915e8b3f25 100644 --- a/mace/kernels/opencl/cl/conv_2d_1x1.cl +++ b/mace/kernels/opencl/cl/conv_2d_1x1.cl @@ -105,21 +105,17 @@ __kernel void conv_2d_1x1(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, width); int out_x_idx = out_w_blk; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out0); out_x_idx += out_w_blks; if (out_x_idx >= width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out1); out_x_idx += out_w_blks; if (out_x_idx >= width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out2); out_x_idx += out_w_blks; if (out_x_idx >= width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/kernels/opencl/cl/conv_2d_3x3.cl index f034bd57d626a0399bd35e5980334cf86f824342..f7c1149f8fd2bbeddc0ba9efccd9f35cd019196c 100644 --- a/mace/kernels/opencl/cl/conv_2d_3x3.cl +++ b/mace/kernels/opencl/cl/conv_2d_3x3.cl @@ -136,37 +136,31 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, out_width); int w = out_w_blk; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out4); - } diff --git a/mace/kernels/opencl/cl/cwise.cl b/mace/kernels/opencl/cl/cwise.cl index f02dadf45fa3c63bf0d39348108a2c4775b1c6b1..e93dfc7cbde706d4d3cb4ead20a6efdb3ea0c5ea 100644 --- a/mace/kernels/opencl/cl/cwise.cl +++ b/mace/kernels/opencl/cl/cwise.cl @@ -44,7 +44,5 @@ __kernel void cwise(KERNEL_ERROR_PARAMS out.w = fabs(in0.w); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error); - WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/depth_to_space.cl b/mace/kernels/opencl/cl/depth_to_space.cl index 6b2cdcc64737582ddf52afe1e9c233f9e9e7c02e..3fd66f5a35fcec5b1563ecc283552a9e398fc754 100644 --- a/mace/kernels/opencl/cl/depth_to_space.cl +++ b/mace/kernels/opencl/cl/depth_to_space.cl @@ -37,8 +37,6 @@ __kernel void depth_to_space(KERNEL_ERROR_PARAMS const int in_pos = mad24(in_d, input_width, in_w); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, in_hb)); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_pos, out_hb, kernel_error); - WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); } @@ -79,7 +77,5 @@ __kernel void space_to_depth(KERNEL_ERROR_PARAMS const int out_pos = mad24(out_d, output_width, out_w); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, hb)); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_pos, out_hb, kernel_error); - WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); } diff --git a/mace/kernels/opencl/cl/depthwise_conv2d.cl b/mace/kernels/opencl/cl/depthwise_conv2d.cl index bfdc790098e328f2b62999d5d8e2c85fbe631e68..9173f1b4d2c47c86db4bf4610ba3ab5c5085df9f 100644 --- a/mace/kernels/opencl/cl/depthwise_conv2d.cl +++ b/mace/kernels/opencl/cl/depthwise_conv2d.cl @@ -123,22 +123,18 @@ __kernel void depthwise_conv2d(KERNEL_ERROR_PARAMS const short out_x_base = mul24(out_ch_blk, out_width); short w = out_w_blk; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } @@ -253,21 +249,17 @@ __kernel void depthwise_conv2d_s1(KERNEL_ERROR_PARAMS const short out_x_base = mul24(out_ch_blk, out_width); short w = out_w_blk; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += 1; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += 1; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += 1; if (w >= out_width) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index 923131e87947c9200b4d83389bbeddefc6d95a9b..58838a7d29aad87345706cb66ecea0d86d4c22a4 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -37,7 +37,5 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS out = in0 - in1; #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error); - WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/fully_connected.cl b/mace/kernels/opencl/cl/fully_connected.cl index cbacd43949003725d98b6df6c597e74b1c331d14..e5de2c641e10fc51415004b7a46d075fdb62ddb3 100644 --- a/mace/kernels/opencl/cl/fully_connected.cl +++ b/mace/kernels/opencl/cl/fully_connected.cl @@ -60,8 +60,6 @@ __kernel void fully_connected(KERNEL_ERROR_PARAMS result = do_activation(result, relux_max_limit); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_blk_idx, batch_idx, kernel_error); - WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); } @@ -153,8 +151,6 @@ __kernel void fully_connected_width(KERNEL_ERROR_PARAMS result = do_activation(result, relux_max_limit); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_blk_idx, batch_idx, kernel_error); - WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); } } diff --git a/mace/kernels/opencl/cl/matmul.cl b/mace/kernels/opencl/cl/matmul.cl index 099aac0a24f7b9200967c3d0164774d39121553c..0509159cc2022325fd8af40fa914c3a9423636f2 100644 --- a/mace/kernels/opencl/cl/matmul.cl +++ b/mace/kernels/opencl/cl/matmul.cl @@ -48,18 +48,14 @@ __kernel void matmul(KERNEL_ERROR_PARAMS c3 += (DATA_TYPE4)(dot(a0, b3), dot(a1, b3), dot(a2, b3), dot(a3, b3)); } - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx, gy, kernel_error); WRITE_IMAGET(C, (int2)(gx, gy), c0); if ((gx + 1) >= N) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 1, gy, kernel_error); WRITE_IMAGET(C, (int2)(gx + 1, gy), c1); if ((gx + 2) >= N) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 2, gy, kernel_error); WRITE_IMAGET(C, (int2)(gx + 2, gy), c2); if ((gx + 3) >= N) return; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 3, gy, kernel_error); WRITE_IMAGET(C, (int2)(gx + 3, gy), c3); } diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/kernels/opencl/cl/pooling.cl index 4d5beef642361a987b0d1e3b71bfe855980bc1d7..c76d055ff21dc0bd8dbe689297427db6c66c9ac3 100644 --- a/mace/kernels/opencl/cl/pooling.cl +++ b/mace/kernels/opencl/cl/pooling.cl @@ -96,6 +96,5 @@ __kernel void pooling(KERNEL_ERROR_PARAMS #endif const int pos = mad24(out_chan_idx, out_width, out_width_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, out_hb_idx, kernel_error); WRITE_IMAGET(output, (int2)(pos, out_hb_idx), res); } diff --git a/mace/kernels/opencl/cl/resize_bilinear.cl b/mace/kernels/opencl/cl/resize_bilinear.cl index 4e30e630902f555410f208181a3976acf62fd793..8736bf52717da115eaf9f32afe8a908b3754cee8 100644 --- a/mace/kernels/opencl/cl/resize_bilinear.cl +++ b/mace/kernels/opencl/cl/resize_bilinear.cl @@ -58,8 +58,6 @@ __kernel void resize_bilinear_nocache(KERNEL_ERROR_PARAMS const int out_w_offset = mul24(ch_blk, out_width); const int out_h_offset = mul24(b, out_height); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_w_offset + w, out_h_offset + h, kernel_error); - WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out); } diff --git a/mace/kernels/opencl/cl/slice.cl b/mace/kernels/opencl/cl/slice.cl index 723f5ba1b50703d29f11585bf12e3eb692660282..366cddc32f77e8e7d7bfcbe35abf3880d560274c 100644 --- a/mace/kernels/opencl/cl/slice.cl +++ b/mace/kernels/opencl/cl/slice.cl @@ -24,6 +24,5 @@ __kernel void slice(KERNEL_ERROR_PARAMS width, width_idx), hb_idx)); const int pos = mad24(chan_blk_idx, width, width_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error); WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/softmax.cl b/mace/kernels/opencl/cl/softmax.cl index 1bc1dc7e5d5967d99b15dc03cd721f09c40b9333..710433a219146af66e63447fbbab7470a9ea9e2f 100644 --- a/mace/kernels/opencl/cl/softmax.cl +++ b/mace/kernels/opencl/cl/softmax.cl @@ -85,7 +85,5 @@ __kernel void softmax(KERNEL_ERROR_PARAMS data = native_exp(data) / sum; } - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error); - WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/space_to_batch.cl b/mace/kernels/opencl/cl/space_to_batch.cl index a8031a04ad58ff147e5b9927f007914b055e0691..eb05319896a5e034fcc9df1f189a95215c2ea77d 100644 --- a/mace/kernels/opencl/cl/space_to_batch.cl +++ b/mace/kernels/opencl/cl/space_to_batch.cl @@ -46,8 +46,6 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(batch_data, batch_coord.x, batch_coord.y, kernel_error); - WRITE_IMAGET(batch_data, batch_coord, value); } @@ -93,8 +91,6 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx, space_b_idx * space_height + space_h_idx); - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(space_data, space_coord.x, space_coord.y, kernel_error); - WRITE_IMAGET(space_data, space_coord, value); } } diff --git a/mace/kernels/opencl/cl/winograd_transform.cl b/mace/kernels/opencl/cl/winograd_transform.cl index 329454afe64f1476a097923a5498042b412d110d..4201fd63b7797723a83e88e009dd7491e654cdb1 100644 --- a/mace/kernels/opencl/cl/winograd_transform.cl +++ b/mace/kernels/opencl/cl/winograd_transform.cl @@ -94,8 +94,6 @@ __kernel void winograd_transform_2x2(KERNEL_ERROR_PARAMS input3[2] = tv3[2] - tv3[1]; input3[3] = tv3[1] - tv3[3]; - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_width_idx, chan_blk_idx + chan_blk_idx * 15, kernel_error); - #pragma unroll for (short i = 0; i < 4; ++i) { WRITE_IMAGET(output, (int2)(out_width_idx, chan_blk_idx), input0[i]); @@ -212,23 +210,18 @@ __kernel void winograd_inverse_transform_2x2(KERNEL_ERROR_PARAMS in1[1] = do_activation(in1[1], relux_max_limit); #endif - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x, coord_y, kernel_error); - WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); t = 0; if (out_width_idx + 1 < out_width) { - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x + 1, coord_y, kernel_error); WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y), in0[1]); t += 1; } if (out_height_idx + 1 < out_height) { - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x, coord_y + 1, kernel_error); WRITE_IMAGET(output, (int2)(coord_x, coord_y + 1), in1[0]); t += 1; } if (t == 2) { - CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x + 1, coord_y + 1, kernel_error); WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y + 1), in1[1]); }