提交 72ef44c8 编写于 作者: Y yejianwu

fix out of range check error

上级 c1d3bcdc
...@@ -31,7 +31,7 @@ __kernel void activation(KERNEL_ERROR_PARAMS ...@@ -31,7 +31,7 @@ __kernel void activation(KERNEL_ERROR_PARAMS
DATA_TYPE4 out = do_activation(in, relux_max_limit); DATA_TYPE4 out = do_activation(in, relux_max_limit);
#endif #endif
check_out_of_range_for_image2d(output, pos, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb), out); WRITE_IMAGET(output, (int2)(pos, hb), out);
} }
...@@ -32,7 +32,7 @@ __kernel void addn(KERNEL_ERROR_PARAMS ...@@ -32,7 +32,7 @@ __kernel void addn(KERNEL_ERROR_PARAMS
out = out + in3; out = out + in3;
#endif #endif
check_out_of_range_for_image2d(output, w, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error);
WRITE_IMAGET(output, (int2)(w, hb), out); WRITE_IMAGET(output, (int2)(w, hb), out);
} }
...@@ -49,7 +49,7 @@ __kernel void batch_norm(KERNEL_ERROR_PARAMS ...@@ -49,7 +49,7 @@ __kernel void batch_norm(KERNEL_ERROR_PARAMS
out = do_activation(out, relux_max_limit); out = do_activation(out, relux_max_limit);
#endif #endif
check_out_of_range_for_image2d(output, pos, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb), out); WRITE_IMAGET(output, (int2)(pos, hb), out);
} }
...@@ -24,7 +24,7 @@ __kernel void bias_add(KERNEL_ERROR_PARAMS ...@@ -24,7 +24,7 @@ __kernel void bias_add(KERNEL_ERROR_PARAMS
DATA_TYPE4 bias_value = READ_IMAGET(bias, SAMPLER, (int2)(ch_blk, 0)); DATA_TYPE4 bias_value = READ_IMAGET(bias, SAMPLER, (int2)(ch_blk, 0));
DATA_TYPE4 out = in + bias_value; DATA_TYPE4 out = in + bias_value;
check_out_of_range_for_image2d(output, pos, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb), out); WRITE_IMAGET(output, (int2)(pos, hb), out);
} }
...@@ -50,7 +50,7 @@ __kernel void filter_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -50,7 +50,7 @@ __kernel void filter_buffer_to_image(KERNEL_ERROR_PARAMS
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -158,7 +158,7 @@ __kernel void dw_filter_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -158,7 +158,7 @@ __kernel void dw_filter_buffer_to_image(KERNEL_ERROR_PARAMS
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -201,7 +201,7 @@ __kernel void in_out_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -201,7 +201,7 @@ __kernel void in_out_buffer_to_image(KERNEL_ERROR_PARAMS
values = vload4(0, input + offset); values = vload4(0, input + offset);
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -278,7 +278,7 @@ __kernel void arg_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -278,7 +278,7 @@ __kernel void arg_buffer_to_image(KERNEL_ERROR_PARAMS
values = vload4(0, input + offset); values = vload4(0, input + offset);
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -356,7 +356,7 @@ __kernel void in_out_height_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -356,7 +356,7 @@ __kernel void in_out_height_buffer_to_image(KERNEL_ERROR_PARAMS
values.x = *(input + offset); values.x = *(input + offset);
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -438,7 +438,7 @@ __kernel void in_out_width_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -438,7 +438,7 @@ __kernel void in_out_width_buffer_to_image(KERNEL_ERROR_PARAMS
values.x = *(input + offset); values.x = *(input + offset);
} }
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, w, h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, h, kernel_error);
WRITE_IMAGET(output, coord, values); WRITE_IMAGET(output, coord, values);
} }
...@@ -513,7 +513,7 @@ __kernel void winograd_filter_buffer_to_image(KERNEL_ERROR_PARAMS ...@@ -513,7 +513,7 @@ __kernel void winograd_filter_buffer_to_image(KERNEL_ERROR_PARAMS
int2 coord = (int2)(w, h); int2 coord = (int2)(w, h);
check_out_of_range_for_image2d(output, coord.x, coord.y + out_channels * 15, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord.x, coord.y + out_channels * 15, kernel_error);
#pragma unroll #pragma unroll
for (short i = 0; i < 4; ++i) { for (short i = 0; i < 4; ++i) {
......
...@@ -51,7 +51,7 @@ __kernel void channel_shuffle(KERNEL_ERROR_PARAMS ...@@ -51,7 +51,7 @@ __kernel void channel_shuffle(KERNEL_ERROR_PARAMS
int out_x = mad24(mad24(group_chan_blk_idx, groups, g_blk), width, width_idx); int out_x = mad24(mad24(group_chan_blk_idx, groups, g_blk), width, width_idx);
check_out_of_range_for_image2d(output, out_x + groups_blks_width * 3, hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x + groups_blks_width * 3, hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(out_x, hb_idx), out_chan_data0); WRITE_IMAGET(output, (int2)(out_x, hb_idx), out_chan_data0);
out_x += groups_blks_width; out_x += groups_blks_width;
......
...@@ -47,6 +47,13 @@ ...@@ -47,6 +47,13 @@
#endif #endif
#ifdef OUT_OF_RANGE_CHECK
#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, x, y, kernel_error) \
check_out_of_range_for_image2d(image, x, y, kernel_error)
#else
#define CHECK_OUT_OF_RANGE_FOR_IMAGE2D(image, x, y, kernel_error)
#endif
__constant sampler_t SAMPLER = __constant sampler_t SAMPLER =
CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
......
...@@ -82,7 +82,7 @@ __kernel void concat_channel(KERNEL_ERROR_PARAMS ...@@ -82,7 +82,7 @@ __kernel void concat_channel(KERNEL_ERROR_PARAMS
const int pos = mad24(chan_blk_idx, width, width_idx); const int pos = mad24(chan_blk_idx, width, width_idx);
check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb_idx), data); WRITE_IMAGET(output, (int2)(pos, hb_idx), data);
} }
...@@ -114,7 +114,7 @@ __kernel void concat_channel_multi(KERNEL_ERROR_PARAMS ...@@ -114,7 +114,7 @@ __kernel void concat_channel_multi(KERNEL_ERROR_PARAMS
const int pos = mad24(chan_blk_idx + chan_blk_offset, width, width_idx); const int pos = mad24(chan_blk_idx + chan_blk_offset, width, width_idx);
check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb_idx), data); WRITE_IMAGET(output, (int2)(pos, hb_idx), data);
} }
......
...@@ -129,22 +129,22 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS ...@@ -129,22 +129,22 @@ __kernel void conv_2d(KERNEL_ERROR_PARAMS
const int out_x_base = mul24(out_ch_blk, out_width); const int out_x_base = mul24(out_ch_blk, out_width);
int w = out_w_blk; int w = out_w_blk;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3);
} }
...@@ -105,21 +105,21 @@ __kernel void conv_2d_1x1(KERNEL_ERROR_PARAMS ...@@ -105,21 +105,21 @@ __kernel void conv_2d_1x1(KERNEL_ERROR_PARAMS
const int out_x_base = mul24(out_ch_blk, width); const int out_x_base = mul24(out_ch_blk, width);
int out_x_idx = out_w_blk; int out_x_idx = out_w_blk;
check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out0); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out0);
out_x_idx += out_w_blks; out_x_idx += out_w_blks;
if (out_x_idx >= width) return; if (out_x_idx >= width) return;
check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out1); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out1);
out_x_idx += out_w_blks; out_x_idx += out_w_blks;
if (out_x_idx >= width) return; if (out_x_idx >= width) return;
check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out2); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out2);
out_x_idx += out_w_blks; out_x_idx += out_w_blks;
if (out_x_idx >= width) return; if (out_x_idx >= width) return;
check_out_of_range_for_image2d(output, out_x_base + out_x_idx, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + out_x_idx, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out3); WRITE_IMAGET(output, (int2)(out_x_base + out_x_idx, out_hb), out3);
} }
...@@ -136,35 +136,35 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS ...@@ -136,35 +136,35 @@ __kernel void conv_2d_3x3(KERNEL_ERROR_PARAMS
const int out_x_base = mul24(out_ch_blk, out_width); const int out_x_base = mul24(out_ch_blk, out_width);
int w = out_w_blk; int w = out_w_blk;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, WRITE_IMAGET(output,
(int2)(out_x_base + w, out_hb), (int2)(out_x_base + w, out_hb),
out0); out0);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, WRITE_IMAGET(output,
(int2)(out_x_base + w, out_hb), (int2)(out_x_base + w, out_hb),
out1); out1);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, WRITE_IMAGET(output,
(int2)(out_x_base + w, out_hb), (int2)(out_x_base + w, out_hb),
out2); out2);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, WRITE_IMAGET(output,
(int2)(out_x_base + w, out_hb), (int2)(out_x_base + w, out_hb),
out3); out3);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, WRITE_IMAGET(output,
(int2)(out_x_base + w, out_hb), (int2)(out_x_base + w, out_hb),
out4); out4);
......
...@@ -44,7 +44,7 @@ __kernel void cwise(KERNEL_ERROR_PARAMS ...@@ -44,7 +44,7 @@ __kernel void cwise(KERNEL_ERROR_PARAMS
out.w = fabs(in0.w); out.w = fabs(in0.w);
#endif #endif
check_out_of_range_for_image2d(output, w, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error);
WRITE_IMAGET(output, (int2)(w, hb), out); WRITE_IMAGET(output, (int2)(w, hb), out);
} }
...@@ -37,7 +37,7 @@ __kernel void depth_to_space(KERNEL_ERROR_PARAMS ...@@ -37,7 +37,7 @@ __kernel void depth_to_space(KERNEL_ERROR_PARAMS
const int in_pos = mad24(in_d, input_width, in_w); const int in_pos = mad24(in_d, input_width, in_w);
DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, in_hb)); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, in_hb));
check_out_of_range_for_image2d(output, out_pos, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_pos, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data);
} }
...@@ -79,7 +79,7 @@ __kernel void space_to_depth(KERNEL_ERROR_PARAMS ...@@ -79,7 +79,7 @@ __kernel void space_to_depth(KERNEL_ERROR_PARAMS
const int out_pos = mad24(out_d, output_width, out_w); const int out_pos = mad24(out_d, output_width, out_w);
DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, hb)); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, hb));
check_out_of_range_for_image2d(output, out_pos, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_pos, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data); WRITE_IMAGET(output, (int2)(out_pos, out_hb), in_data);
} }
...@@ -123,22 +123,22 @@ __kernel void depthwise_conv2d(KERNEL_ERROR_PARAMS ...@@ -123,22 +123,22 @@ __kernel void depthwise_conv2d(KERNEL_ERROR_PARAMS
const short out_x_base = mul24(out_ch_blk, out_width); const short out_x_base = mul24(out_ch_blk, out_width);
short w = out_w_blk; short w = out_w_blk;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2);
w += out_w_blks; w += out_w_blks;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3);
} }
...@@ -253,21 +253,21 @@ __kernel void depthwise_conv2d_s1(KERNEL_ERROR_PARAMS ...@@ -253,21 +253,21 @@ __kernel void depthwise_conv2d_s1(KERNEL_ERROR_PARAMS
const short out_x_base = mul24(out_ch_blk, out_width); const short out_x_base = mul24(out_ch_blk, out_width);
short w = out_w_blk; short w = out_w_blk;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out0);
w += 1; w += 1;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out1);
w += 1; w += 1;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out2);
w += 1; w += 1;
if (w >= out_width) return; if (w >= out_width) return;
check_out_of_range_for_image2d(output, out_x_base + w, out_hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_x_base + w, out_hb, kernel_error);
WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3); WRITE_IMAGET(output, (int2)(out_x_base + w, out_hb), out3);
} }
...@@ -37,7 +37,7 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS ...@@ -37,7 +37,7 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
out = in0 - in1; out = in0 - in1;
#endif #endif
check_out_of_range_for_image2d(output, w, hb, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, w, hb, kernel_error);
WRITE_IMAGET(output, (int2)(w, hb), out); WRITE_IMAGET(output, (int2)(w, hb), out);
} }
...@@ -60,7 +60,7 @@ __kernel void fully_connected(KERNEL_ERROR_PARAMS ...@@ -60,7 +60,7 @@ __kernel void fully_connected(KERNEL_ERROR_PARAMS
result = do_activation(result, relux_max_limit); result = do_activation(result, relux_max_limit);
#endif #endif
check_out_of_range_for_image2d(output, out_blk_idx, batch_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_blk_idx, batch_idx, kernel_error);
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
} }
...@@ -153,7 +153,7 @@ __kernel void fully_connected_width(KERNEL_ERROR_PARAMS ...@@ -153,7 +153,7 @@ __kernel void fully_connected_width(KERNEL_ERROR_PARAMS
result = do_activation(result, relux_max_limit); result = do_activation(result, relux_max_limit);
#endif #endif
check_out_of_range_for_image2d(output, out_blk_idx, batch_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_blk_idx, batch_idx, kernel_error);
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
} }
......
...@@ -48,18 +48,18 @@ __kernel void matmul(KERNEL_ERROR_PARAMS ...@@ -48,18 +48,18 @@ __kernel void matmul(KERNEL_ERROR_PARAMS
c3 += (DATA_TYPE4)(dot(a0, b3), dot(a1, b3), dot(a2, b3), dot(a3, b3)); c3 += (DATA_TYPE4)(dot(a0, b3), dot(a1, b3), dot(a2, b3), dot(a3, b3));
} }
check_out_of_range_for_image2d(C, gx, gy, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx, gy, kernel_error);
WRITE_IMAGET(C, (int2)(gx, gy), c0); WRITE_IMAGET(C, (int2)(gx, gy), c0);
if ((gx + 1) >= N) return; if ((gx + 1) >= N) return;
check_out_of_range_for_image2d(C, gx + 1, gy, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 1, gy, kernel_error);
WRITE_IMAGET(C, (int2)(gx + 1, gy), c1); WRITE_IMAGET(C, (int2)(gx + 1, gy), c1);
if ((gx + 2) >= N) return; if ((gx + 2) >= N) return;
check_out_of_range_for_image2d(C, gx + 2, gy, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 2, gy, kernel_error);
WRITE_IMAGET(C, (int2)(gx + 2, gy), c2); WRITE_IMAGET(C, (int2)(gx + 2, gy), c2);
if ((gx + 3) >= N) return; if ((gx + 3) >= N) return;
check_out_of_range_for_image2d(C, gx + 3, gy, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(C, gx + 3, gy, kernel_error);
WRITE_IMAGET(C, (int2)(gx + 3, gy), c3); WRITE_IMAGET(C, (int2)(gx + 3, gy), c3);
} }
...@@ -96,6 +96,6 @@ __kernel void pooling(KERNEL_ERROR_PARAMS ...@@ -96,6 +96,6 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
#endif #endif
const int pos = mad24(out_chan_idx, out_width, out_width_idx); const int pos = mad24(out_chan_idx, out_width, out_width_idx);
check_out_of_range_for_image2d(output, pos, out_hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, out_hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(pos, out_hb_idx), res); WRITE_IMAGET(output, (int2)(pos, out_hb_idx), res);
} }
...@@ -58,7 +58,7 @@ __kernel void resize_bilinear_nocache(KERNEL_ERROR_PARAMS ...@@ -58,7 +58,7 @@ __kernel void resize_bilinear_nocache(KERNEL_ERROR_PARAMS
const int out_w_offset = mul24(ch_blk, out_width); const int out_w_offset = mul24(ch_blk, out_width);
const int out_h_offset = mul24(b, out_height); const int out_h_offset = mul24(b, out_height);
check_out_of_range_for_image2d(output, out_w_offset + w, out_h_offset + h, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_w_offset + w, out_h_offset + h, kernel_error);
WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out); WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out);
} }
......
...@@ -24,6 +24,6 @@ __kernel void slice(KERNEL_ERROR_PARAMS ...@@ -24,6 +24,6 @@ __kernel void slice(KERNEL_ERROR_PARAMS
width, width_idx), hb_idx)); width, width_idx), hb_idx));
const int pos = mad24(chan_blk_idx, width, width_idx); const int pos = mad24(chan_blk_idx, width, width_idx);
check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb_idx), data); WRITE_IMAGET(output, (int2)(pos, hb_idx), data);
} }
...@@ -85,7 +85,7 @@ __kernel void softmax(KERNEL_ERROR_PARAMS ...@@ -85,7 +85,7 @@ __kernel void softmax(KERNEL_ERROR_PARAMS
data = native_exp(data) / sum; data = native_exp(data) / sum;
} }
check_out_of_range_for_image2d(output, pos, hb_idx, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, pos, hb_idx, kernel_error);
WRITE_IMAGET(output, (int2)(pos, hb_idx), data); WRITE_IMAGET(output, (int2)(pos, hb_idx), data);
} }
...@@ -46,7 +46,7 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS ...@@ -46,7 +46,7 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS
int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx); int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx);
check_out_of_range_for_image2d(batch_data, batch_coord.x, batch_coord.y, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(batch_data, batch_coord.x, batch_coord.y, kernel_error);
WRITE_IMAGET(batch_data, batch_coord, value); WRITE_IMAGET(batch_data, batch_coord, value);
} }
...@@ -93,7 +93,7 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS ...@@ -93,7 +93,7 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS
int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx, int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx,
space_b_idx * space_height + space_h_idx); space_b_idx * space_height + space_h_idx);
check_out_of_range_for_image2d(space_data, space_coord.x, space_coord.y, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(space_data, space_coord.x, space_coord.y, kernel_error);
WRITE_IMAGET(space_data, space_coord, value); WRITE_IMAGET(space_data, space_coord, value);
} }
......
...@@ -94,7 +94,7 @@ __kernel void winograd_transform_2x2(KERNEL_ERROR_PARAMS ...@@ -94,7 +94,7 @@ __kernel void winograd_transform_2x2(KERNEL_ERROR_PARAMS
input3[2] = tv3[2] - tv3[1]; input3[2] = tv3[2] - tv3[1];
input3[3] = tv3[1] - tv3[3]; input3[3] = tv3[1] - tv3[3];
check_out_of_range_for_image2d(output, out_width_idx, chan_blk_idx + chan_blk_idx * 15, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, out_width_idx, chan_blk_idx + chan_blk_idx * 15, kernel_error);
#pragma unroll #pragma unroll
for (short i = 0; i < 4; ++i) { for (short i = 0; i < 4; ++i) {
...@@ -212,23 +212,23 @@ __kernel void winograd_inverse_transform_2x2(KERNEL_ERROR_PARAMS ...@@ -212,23 +212,23 @@ __kernel void winograd_inverse_transform_2x2(KERNEL_ERROR_PARAMS
in1[1] = do_activation(in1[1], relux_max_limit); in1[1] = do_activation(in1[1], relux_max_limit);
#endif #endif
check_out_of_range_for_image2d(output, coord_x, coord_y, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x, coord_y, kernel_error);
WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]);
t = 0; t = 0;
if (out_width_idx + 1 < out_width) { if (out_width_idx + 1 < out_width) {
check_out_of_range_for_image2d(output, coord_x + 1, coord_y, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x + 1, coord_y, kernel_error);
WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y), in0[1]); WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y), in0[1]);
t += 1; t += 1;
} }
if (out_height_idx + 1 < out_height) { if (out_height_idx + 1 < out_height) {
check_out_of_range_for_image2d(output, coord_x, coord_y + 1, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x, coord_y + 1, kernel_error);
WRITE_IMAGET(output, (int2)(coord_x, coord_y + 1), in1[0]); WRITE_IMAGET(output, (int2)(coord_x, coord_y + 1), in1[0]);
t += 1; t += 1;
} }
if (t == 2) { if (t == 2) {
check_out_of_range_for_image2d(output, coord_x + 1, coord_y + 1, kernel_error); CHECK_OUT_OF_RANGE_FOR_IMAGE2D(output, coord_x + 1, coord_y + 1, kernel_error);
WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y + 1), in1[1]); WRITE_IMAGET(output, (int2)(coord_x + 1, coord_y + 1), in1[1]);
} }
......
...@@ -70,7 +70,6 @@ else ...@@ -70,7 +70,6 @@ else
ADB_CMD_STR="LD_LIBRARY_PATH=${PHONE_DATA_DIR} \ ADB_CMD_STR="LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
MACE_TUNING=${tuning_flag} \ MACE_TUNING=${tuning_flag} \
MACE_OUT_OF_RANGE_CHECK="1" \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \ MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \ MACE_KERNEL_PATH=$KERNEL_DIR \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册