diff --git a/src/operators/kernel/cl/cl_kernel/relu.cl b/src/operators/kernel/cl/cl_kernel/relu.cl index 20714202a6745bcb4f1810c20665a20e387e665d..da6f2d0faf823bb899b4f2097c2cdac4d76b8499 100644 --- a/src/operators/kernel/cl/cl_kernel/relu.cl +++ b/src/operators/kernel/cl/cl_kernel/relu.cl @@ -1,4 +1,16 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma OPENCL EXTENSION cl_khr_fp16 : enable __kernel void relu(__read_only image2d_t input, @@ -12,6 +24,6 @@ __kernel void relu(__read_only image2d_t input, CLK_FILTER_NEAREST; half4 in = read_imageh(input, sampler, (int2)(x, y)); - in = max((half4)(0.0), in); + in = max((half4)(0.0f,0.0f,0.0f,0.0f), in); write_imageh(output, (int2)(x, y), in); } \ No newline at end of file diff --git a/src/operators/kernel/cl/cl_kernel/reshape.cl b/src/operators/kernel/cl/cl_kernel/reshape.cl index 4055445d1576b2ca54919ed03ad187d08cff14c2..062ba55de0699c6baa07cf4c33c1cd13f59b592a 100644 --- a/src/operators/kernel/cl/cl_kernel/reshape.cl +++ b/src/operators/kernel/cl/cl_kernel/reshape.cl @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + __kernel void reshape(__read_only image2d_t input, __write_only image2d_t output, __private const int d0, @@ -36,14 +38,14 @@ __kernel void reshape(__read_only image2d_t input, int t = obx * 4 + i; if (t > x1) break; int oindex = oby * x1 * x2 * x3 + t * x2 * x3 + ox * x3 + oy; - int i0, i1, i2, i3; int i3 = oindex % d3; oindex /= d3; int i2 = oindex % d2; oindex /= d2; int i1 = oindex % d1; oindex /= d1; int i0 = oindex; int ix = (i1 / 4) * d3 + i3; int iy = i0 * d2 + i2; - r[i] = read_imageh(input, sampler, int2(ix, iy))[i1%4]; + half4 p = read_imageh(input, sampler, (int2)(ix, iy)); + ((half*)&r)[i] = ((half*)&p)[i1%4]; } - write_imageh(output, int2(x, y), r); -} \ No newline at end of file + write_imageh(output, (int2)(x, y), r); +}