fetch_kernel.cl 1.7 KB
Newer Older
1 2 3 4
#pragma OPENCL EXTENSION cl_khr_fp16 : enable

__kernel void fetch(__private const int in_height,
                    __private const int in_width,
Z
zhaojiaying01 已提交
5 6
                    __read_only image2d_t input,
                    __global float* out,
7 8
                    __private const int size_ch,
                    __private const int size_block,
Z
zhaojiaying01 已提交
9
                    __private const int size_batch) {
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
  const int in_c = get_global_id(0);
  const int in_w = get_global_id(1);
  const int in_nh = get_global_id(2);
  const int in_n = in_nh / in_height;
  const int in_h = in_nh % in_height;

  const sampler_t sampler =
      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;

  const int pos_x = mad24(in_c, in_width, in_w);
  half4 in = read_imageh(input, sampler, (int2)(pos_x, in_nh));

  const int index = in_n * size_batch + in_c * size_block + in_h * in_width + in_w;
  out[index] = convert_float(in.x);
  out[index + size_ch] = convert_float(in.y);
  out[index + size_ch * 2] = convert_float(in.z);
  out[index + size_ch * 3] = convert_float(in.w);
}
Z
zhaojiaying01 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

__kernel void fetch_2d(__private const int in_height,
                       __private const int in_width,
                       __read_only image2d_t input,
                       __global float* out) {
  const int in_w = get_global_id(1);
  const int in_h = get_global_id(2);

  const sampler_t sampler =
      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;

  half4 in = read_imageh(input, sampler, (int2)(in_w, in_h));

  const int index = (in_h * in_width + in_w) * 4;
  out[index] = convert_float(in.x);
  out[index + 1] = convert_float(in.y);
  out[index + 2] = convert_float(in.z);
  out[index + 3] = convert_float(in.w);
}