diff --git a/mace/kernels/opencl/cl/activation.cl b/mace/kernels/opencl/cl/activation.cl index a3aa0be70d27105253cd112a136ad2fd12da15c8..4ddb8fd03b6a897c42dd1941145a000f1c54c1b4 100644 --- a/mace/kernels/opencl/cl/activation.cl +++ b/mace/kernels/opencl/cl/activation.cl @@ -31,8 +31,7 @@ __kernel void activation(KERNEL_ERROR_PARAMS DATA_TYPE4 out = do_activation(in, relux_max_limit); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/addn.cl b/mace/kernels/opencl/cl/addn.cl index 7d2d43233d14486cf4c8f3b154526e02f7e7467a..444b57e591a6189b305ef24d7c6eb8372fcc3a39 100644 --- a/mace/kernels/opencl/cl/addn.cl +++ b/mace/kernels/opencl/cl/addn.cl @@ -32,9 +32,7 @@ __kernel void addn(KERNEL_ERROR_PARAMS out = out + in3; #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/kernels/opencl/cl/batch_norm.cl index f3ba9accecf093af396a157cb528c078f5f956a2..7e46889e57f484e8b9f167bf8a4b5d16d32beddc 100644 --- a/mace/kernels/opencl/cl/batch_norm.cl +++ b/mace/kernels/opencl/cl/batch_norm.cl @@ -49,8 +49,7 @@ __kernel void batch_norm(KERNEL_ERROR_PARAMS out = do_activation(out, relux_max_limit); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/bias_add.cl b/mace/kernels/opencl/cl/bias_add.cl index 77cf7e7e5dcfcf5239c8d57e8f57f768e637837c..b86738d68e346feb7d06d8c365180c72c287abd6 100644 --- a/mace/kernels/opencl/cl/bias_add.cl +++ b/mace/kernels/opencl/cl/bias_add.cl @@ -24,8 +24,7 @@ __kernel void bias_add(KERNEL_ERROR_PARAMS DATA_TYPE4 bias_value = READ_IMAGET(bias, SAMPLER, (int2)(ch_blk, 0)); DATA_TYPE4 out = in + bias_value; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb), out); } diff --git a/mace/kernels/opencl/cl/buffer_to_image.cl b/mace/kernels/opencl/cl/buffer_to_image.cl index 4a4b068ed7857ace604f854b3cfbb11b96a6ac05..e311c467ce10ed9ec65199f0ff3ad5201fdd4791 100644 --- a/mace/kernels/opencl/cl/buffer_to_image.cl +++ b/mace/kernels/opencl/cl/buffer_to_image.cl @@ -50,9 +50,7 @@ __kernel void filter_buffer_to_image(KERNEL_ERROR_PARAMS } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -160,9 +158,7 @@ __kernel void dw_filter_buffer_to_image(KERNEL_ERROR_PARAMS } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -205,9 +201,7 @@ __kernel void in_out_buffer_to_image(KERNEL_ERROR_PARAMS values = vload4(0, input + offset); } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -284,9 +278,7 @@ __kernel void arg_buffer_to_image(KERNEL_ERROR_PARAMS values = vload4(0, input + offset); } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -364,9 +356,7 @@ __kernel void in_out_height_buffer_to_image(KERNEL_ERROR_PARAMS values.x = *(input + offset); } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -448,9 +438,7 @@ __kernel void in_out_width_buffer_to_image(KERNEL_ERROR_PARAMS values.x = *(input + offset); } int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, h, kernel_error); -#endif WRITE_IMAGET(output, coord, values); } @@ -525,9 +513,7 @@ __kernel void winograd_filter_buffer_to_image(KERNEL_ERROR_PARAMS int2 coord = (int2)(w, h); -#ifdef OUT_OF_RANGE_CHECK - check_out_of_range_for_image2d(output, coord.x, coord.y + out_channels * 15, kernel_error); -#endif + check_out_of_range_for_image2d(output, coord.x, coord.y + out_channels * 15, kernel_error); #pragma unroll for (short i = 0; i < 4; ++i) { diff --git a/mace/kernels/opencl/cl/channel_shuffle.cl b/mace/kernels/opencl/cl/channel_shuffle.cl index a73ab8c54c2505cb7e01ee661f394d8d1bc61728..5a1fb5cde38911918a7e68fa1ae2250990b7f010 100644 --- a/mace/kernels/opencl/cl/channel_shuffle.cl +++ b/mace/kernels/opencl/cl/channel_shuffle.cl @@ -51,9 +51,7 @@ __kernel void channel_shuffle(KERNEL_ERROR_PARAMS int out_x = mad24(mad24(group_chan_blk_idx, groups, g_blk), width, width_idx); -#ifdef OUT_OF_RANGE_CHECK - check_out_of_range_for_image2d(output, out_x + groups_blks_width * 3, hb_idx, kernel_error); -#endif + check_out_of_range_for_image2d(output, out_x + groups_blks_width * 3, hb_idx, kernel_error); WRITE_IMAGET(output, (int2)(out_x, hb_idx), out_chan_data0); out_x += groups_blks_width; diff --git a/mace/kernels/opencl/cl/common.h b/mace/kernels/opencl/cl/common.h index b3054c940010cc32c1a8e87a58dceae4fab7e098..b038bb7d17a12b5531a5444990d6d88e16e3f8e2 100644 --- a/mace/kernels/opencl/cl/common.h +++ b/mace/kernels/opencl/cl/common.h @@ -78,10 +78,12 @@ inline void check_out_of_range_for_image2d(__write_only image2d_t image, __private const int x, __private const int y, global char *kernel_error) { +#ifdef OUT_OF_RANGE_CHECK int2 image_dim = get_image_dim(image); if (x >= image_dim.x || y >= image_dim.y) { *kernel_error = '1'; } +#endif } #endif // MACE_KERNELS_OPENCL_CL_COMMON_H_ diff --git a/mace/kernels/opencl/cl/concat.cl b/mace/kernels/opencl/cl/concat.cl index b1d5fb52c978269ec4066390a7f6c6dfae25a04e..31b7eb47f06829e065815c445c4efa7e36143931 100644 --- a/mace/kernels/opencl/cl/concat.cl +++ b/mace/kernels/opencl/cl/concat.cl @@ -82,9 +82,8 @@ __kernel void concat_channel(KERNEL_ERROR_PARAMS const int pos = mad24(chan_blk_idx, width, width_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } @@ -115,9 +114,8 @@ __kernel void concat_channel_multi(KERNEL_ERROR_PARAMS const int pos = mad24(chan_blk_idx + chan_blk_offset, width, width_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/conv_2d.cl b/mace/kernels/opencl/cl/conv_2d.cl index 03a2b47e56b5752f71ef2bbbf8e68682e5033445..2bf65bc4c0001a83f88cccaab635d290c202c2c2 100644 --- a/mace/kernels/opencl/cl/conv_2d.cl +++ b/mace/kernels/opencl/cl/conv_2d.cl @@ -129,30 +129,22 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, out_width); int w = out_w_blk; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/conv_2d_1x1.cl b/mace/kernels/opencl/cl/conv_2d_1x1.cl index 0a748925ea01215d71fbac7e3d23a30d2614250f..4ee8e0a5e08240c48836dac532bed017b141500d 100644 --- a/mace/kernels/opencl/cl/conv_2d_1x1.cl +++ b/mace/kernels/opencl/cl/conv_2d_1x1.cl @@ -105,29 +105,21 @@ __kernel void conv_2d_1x1(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, width); int out_x_idx = out_w_blk; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out0); out_x_idx += out_w_blks; if (out_x_idx >= width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out1); out_x_idx += out_w_blks; if (out_x_idx >= width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out2); out_x_idx += out_w_blks; if (out_x_idx >= width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/kernels/opencl/cl/conv_2d_3x3.cl index a6d07b8b6a54e0542471ca4e7de23f6d0eb196d6..6fbd0d2ea9834dc407284c3e0c128d31ded7eace 100644 --- a/mace/kernels/opencl/cl/conv_2d_3x3.cl +++ b/mace/kernels/opencl/cl/conv_2d_3x3.cl @@ -136,45 +136,35 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS const int out_x_base = mul24(out_ch_blk, out_width); int w = out_w_blk; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out4); diff --git a/mace/kernels/opencl/cl/cwise.cl b/mace/kernels/opencl/cl/cwise.cl index f5d96647d2a0db5efd42b2ae108bc6a913995d92..ebaca71643f9a41a12f793d85e3cb4bfb5c4b359 100644 --- a/mace/kernels/opencl/cl/cwise.cl +++ b/mace/kernels/opencl/cl/cwise.cl @@ -44,8 +44,7 @@ __kernel void cwise(KERNEL_ERROR_PARAMS out.w = fabs(in0.w); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/depth_to_space.cl b/mace/kernels/opencl/cl/depth_to_space.cl index f6e82b134d1c4e884051c7e6df8b0e816fcc0872..2cbb6c7aaaa613edf128dc2f3e318d116ba84972 100644 --- a/mace/kernels/opencl/cl/depth_to_space.cl +++ b/mace/kernels/opencl/cl/depth_to_space.cl @@ -37,9 +37,8 @@ __kernel void depth_to_space(KERNEL_ERROR_PARAMS const int in_pos = mad24(in_d, input_width, in_w); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, in_hb)); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_pos, out_hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); } @@ -80,8 +79,7 @@ __kernel void space_to_depth(KERNEL_ERROR_PARAMS const int out_pos = mad24(out_d, output_width, out_w); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, hb)); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_pos, out_hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); } diff --git a/mace/kernels/opencl/cl/depthwise_conv2d.cl b/mace/kernels/opencl/cl/depthwise_conv2d.cl index 932e37da204645a621067bb302cb06d4fd591175..7015bb711dbfb216fb4b4e7f80ce24ee68ab38b7 100644 --- a/mace/kernels/opencl/cl/depthwise_conv2d.cl +++ b/mace/kernels/opencl/cl/depthwise_conv2d.cl @@ -123,30 +123,22 @@ __kernel void depthwise_conv2d(KERNEL_ERROR_PARAMS const short out_x_base = mul24(out_ch_blk, out_width); short w = out_w_blk; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += out_w_blks; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } @@ -261,29 +253,21 @@ __kernel void depthwise_conv2d_s1(KERNEL_ERROR_PARAMS const short out_x_base = mul24(out_ch_blk, out_width); short w = out_w_blk; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); w += 1; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); w += 1; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); w += 1; if (w >= out_width) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); -#endif WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); } diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index 81ba7d8718f42387280ff83be069a86870bff7c5..a171bf071273409aa42c15774abdf97bba43ec23 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -37,8 +37,7 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS out = in0 - in1; #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, w, hb, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(w, hb), out); } diff --git a/mace/kernels/opencl/cl/fully_connected.cl b/mace/kernels/opencl/cl/fully_connected.cl index b913986a06fbbdfbfcf0b0f5a6a2ce08252efe0d..086d5e4ffea90d0874bca08ec983f1f0ef9af1a7 100644 --- a/mace/kernels/opencl/cl/fully_connected.cl +++ b/mace/kernels/opencl/cl/fully_connected.cl @@ -60,9 +60,8 @@ __kernel void fully_connected(KERNEL_ERROR_PARAMS result = do_activation(result, relux_max_limit); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_blk_idx, batch_idx, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); } @@ -154,9 +153,8 @@ __kernel void fully_connected_width(KERNEL_ERROR_PARAMS result = do_activation(result, relux_max_limit); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_blk_idx, batch_idx, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); } } diff --git a/mace/kernels/opencl/cl/matmul.cl b/mace/kernels/opencl/cl/matmul.cl index 1fa2b46f481d629f107959bf5f820cfdfc2a2f90..8641b4f5a52693d44c490c464e2dfec4ac404716 100644 --- a/mace/kernels/opencl/cl/matmul.cl +++ b/mace/kernels/opencl/cl/matmul.cl @@ -48,26 +48,18 @@ __kernel void matmul(KERNEL_ERROR_PARAMS c3 += (DATA_TYPE4)(dot(a0, b3), dot(a1, b3), dot(a2, b3), dot(a3, b3)); } -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(C, gx, gy, kernel_error); -#endif WRITE_IMAGET(C, (int2)(gx, gy), c0); if ((gx + 1) >= N) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(C, gx + 1, gy, kernel_error); -#endif WRITE_IMAGET(C, (int2)(gx + 1, gy), c1); if ((gx + 2) >= N) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(C, gx + 2, gy, kernel_error); -#endif WRITE_IMAGET(C, (int2)(gx + 2, gy), c2); if ((gx + 3) >= N) return; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(C, gx + 3, gy, kernel_error); -#endif WRITE_IMAGET(C, (int2)(gx + 3, gy), c3); } diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/kernels/opencl/cl/pooling.cl index c3abeb01c12441c4d5a01b3ea8c3d7753f050ec5..3ab5f62de2eaa7f856835c2fd1abe4c885e69981 100644 --- a/mace/kernels/opencl/cl/pooling.cl +++ b/mace/kernels/opencl/cl/pooling.cl @@ -96,8 +96,6 @@ __kernel void pooling(KERNEL_ERROR_PARAMS #endif const int pos = mad24(out_chan_idx, out_width, out_width_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, out_hb_idx, kernel_error); -#endif WRITE_IMAGET(output, (int2)(pos, out_hb_idx), res); } diff --git a/mace/kernels/opencl/cl/resize_bilinear.cl b/mace/kernels/opencl/cl/resize_bilinear.cl index 091fb617c120b4c3449e248391ca1221a133f23f..fb069c42fc5ce8215bcfd7a606df41a167fc4e65 100644 --- a/mace/kernels/opencl/cl/resize_bilinear.cl +++ b/mace/kernels/opencl/cl/resize_bilinear.cl @@ -58,9 +58,8 @@ __kernel void resize_bilinear_nocache(KERNEL_ERROR_PARAMS const int out_w_offset = mul24(ch_blk, out_width); const int out_h_offset = mul24(b, out_height); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_w_offset + w, out_h_offset + h, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out); } diff --git a/mace/kernels/opencl/cl/slice.cl b/mace/kernels/opencl/cl/slice.cl index 0116fd91da6f6be92dc1a97ba683f79e000f10cf..c78e6dc826a8740f982bb1edb5a5fa15836a11ad 100644 --- a/mace/kernels/opencl/cl/slice.cl +++ b/mace/kernels/opencl/cl/slice.cl @@ -24,9 +24,6 @@ __kernel void slice(KERNEL_ERROR_PARAMS width, width_idx), hb_idx)); const int pos = mad24(chan_blk_idx, width, width_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); -#endif - WRITE_IMAGET(output, - (int2)(pos, hb_idx), data); + WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/softmax.cl b/mace/kernels/opencl/cl/softmax.cl index ae434eb91c3a927ed1d7103da4cd5cf1b07cfa7c..ad554b542ca0e11b09ccf0a1e9475aeff0f27ef4 100644 --- a/mace/kernels/opencl/cl/softmax.cl +++ b/mace/kernels/opencl/cl/softmax.cl @@ -85,8 +85,7 @@ __kernel void softmax(KERNEL_ERROR_PARAMS data = native_exp(data) / sum; } -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(pos, hb_idx), data); } diff --git a/mace/kernels/opencl/cl/space_to_batch.cl b/mace/kernels/opencl/cl/space_to_batch.cl index dcf927787c9204bf04c379639032bd4a5a37953b..46764a08fc2f7d6fcd813eb5daf48f1527ea7ed0 100644 --- a/mace/kernels/opencl/cl/space_to_batch.cl +++ b/mace/kernels/opencl/cl/space_to_batch.cl @@ -46,9 +46,8 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(batch_data, batch_coord.x, batch_coord.y, kernel_error); -#endif + WRITE_IMAGET(batch_data, batch_coord, value); } @@ -94,9 +93,8 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx, space_b_idx * space_height + space_h_idx); -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(space_data, space_coord.x, space_coord.y, kernel_error); -#endif + WRITE_IMAGET(space_data, space_coord, value); } } diff --git a/mace/kernels/opencl/cl/winograd_transform.cl b/mace/kernels/opencl/cl/winograd_transform.cl index aac11e97b9db161f33283430942ce8015582324a..23d4bc6fd2de3e42150c2e607e541b37aee8b0bd 100644 --- a/mace/kernels/opencl/cl/winograd_transform.cl +++ b/mace/kernels/opencl/cl/winograd_transform.cl @@ -94,9 +94,8 @@ __kernel void winograd_transform_2x2(KERNEL_ERROR_PARAMS input3[2] = tv3[2] - tv3[1]; input3[3] = tv3[1] - tv3[3]; -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, out_width_idx, chan_blk_idx + chan_blk_idx * 15, kernel_error); -#endif + #pragma unroll for (short i = 0; i < 4; ++i) { WRITE_IMAGET(output, (int2)(out_width_idx, chan_blk_idx), input0[i]); @@ -213,30 +212,23 @@ __kernel void winograd_inverse_transform_2x2(KERNEL_ERROR_PARAMS in1[1] = do_activation(in1[1], relux_max_limit); #endif -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, coord_x, coord_y, kernel_error); -#endif + WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); t = 0; if (out_width_idx + 1 < out_width) { -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, coord_x + 1, coord_y, kernel_error); -#endif WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y), in0[1]); t += 1; } if (out_height_idx + 1 < out_height) { -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, coord_x, coord_y + 1, kernel_error); -#endif WRITE_IMAGET(output, (int2)(coord_x, coord_y + 1), in1[0]); t += 1; } if (t == 2) { -#ifdef OUT_OF_RANGE_CHECK check_out_of_range_for_image2d(output, coord_x + 1, coord_y + 1, kernel_error); -#endif WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y + 1), in1[1]); }