diff --git a/src/operators/kernel/cl/cl_kernel/common.h b/src/operators/kernel/cl/cl_kernel/common.h new file mode 100644 index 0000000000000000000000000000000000000000..80d90e25ba91443768e488be6db24820edd1a083 --- /dev/null +++ b/src/operators/kernel/cl/cl_kernel/common.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once; + +/* + +inline hafl4 activation(half4 in +#ifdef PRELU + ,half4 prelu_alpha +#endif + ) { + half4 output; +#ifdef PRELU + output = select(prelu_alpha * in, in, in >= (half4)0.0); +#endif + +#ifdef RELU + fmax(in, 0.0); +#endif + return output; +} + +*/ + diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl index 71bd1d9ceec4091276d9143d7ad1913371ccbad1..a1f6df0e7b43fbe3fe9495a008f37accc95586af 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl @@ -1,7 +1,162 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 -__kernel void conv_3x3(__global float* in, __global float* out) { - int num = get_global_id(0); - out[num] = in[num] * 0.1 + 102; - } +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + +#include "common.h" + +__kernel void conv_1x1(__private const int global_size_dim0, + __private const int global_size_dim1, + __private const int global_size_dim2, + __read_only image2d_t input, + __read_only image2d_t filter, + __read_only image2d_t bias, + __write_only image2d_t output_image, + __private const int stride, + __private const int offset, + __private const int input_c, + __private const int input_width,/* of one block */ + __private const int input_height/* of one block */) { + const int out_c = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + const uint kernelHXW = 1; + int2 stride_xy = int2(stride, stride); + int2 ouput_pos_in_one_block = int2(out_w, out_nh); + int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + int2(offset, offset); + int input_c; + half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + + for (int i = 0; i < input_c;h ++i) { + int2 pos_in = int2(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); + if (pos_in.x >=0 && pos_in.y >= 0 && pos_in.x < input_width && pos_in.y < input_height) { + hafl4 input = read_imageh(input, sampler, pos_in); + + half4 weight_x = read_imageh(filter, sampler, int2(i, out_c * 4 + 0)); + output.x += dot(input, weight_x); + + half4 weight_y = read_imageh(filter, sampler, int2(i, out_c * 4 + 1)); + output.y += dot(input, weight_y); + + half4 weight_z = read_imageh(filter, sampler, int2(i, out_c * 4 + 2)); + output.z += dot(input, weight_z); + + half4 weight_w = read_imageh(filter, sampler, int2(i, out_c * 4 + 3)); + output.w += dot(input, weight_w); + } + } +#if defined(RELU) + output = activation(output); +#endif + + int2 output_pos(out_c * global_size_dim1 + out_w, out_nh); + write_imageh(output_image, output_pos, output); +} + + +__kernel void conv_3x3(__private const int global_size_dim0, + __private const int global_size_dim1, + __private const int global_size_dim2, + __read_only image2d_t input, + __read_only image2d_t filter, + __read_only image2d_t bias, + __write_only image2d_t output_image, + __private const int stride, + __private const int offset, + __private const int input_c, + __private const int dilation, + __private const int input_width,/* of one block */ + __private const int input_height/* of one block */) { + int2 stride_xy = int2(stride, stride); + int2 ouput_pos_in_one_block = int2(out_w, out_nh); + int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + int2(offset, offset); + + half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + + half4 input[9]; + + for (int i = 0; i < input_c; ++i) { + int2 pos_in = int2(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); + + input[0] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[1] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[2] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[3] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y)), + half4(0.0), in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y >= input_height); + + input[4] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y)), + half4(0.0), in_pos_in_one_block.x < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y >= input_height); + + input[5] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y)), + half4(0.0), in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y >= input_height); + + input[6] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y + dilation)), + half4(0.0), in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + input[7] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y + dilation)), + half4(0.0), in_pos_in_one_block.x < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + input[8] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y + dilation)), + half4(0.0), pos_in.x + dilation < 0 || in_pos_in_one_block.y + dilation < 0 || pos_in.x + dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + + for (int j = 0; j < 9; ++j) { + + half4 weight_x = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 0 * out_c * 3 + j / 3)); + output.x += dot(input[j], weight_x); + + half4 weight_y = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 1 * out_c * 3 + j / 3)); + output.y += dot(input[j], weight_y); + + half4 weight_z = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 2 * out_c * 3 + j / 3)); + output.z += dot(input[j], weight_z); + + half4 weight_w = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 3 * out_c * 3 + j / 3)); + output.w += dot(input[j], weight_w); + + } + } + +#if defined(RELU) + output = activation(output); +#endif + + int2 output_pos(out_c * global_size_dim1 + out_w, out_nh); + write_imageh(output_image, output_pos, output); +} + + + + + +*/ diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl new file mode 100644 index 0000000000000000000000000000000000000000..78b5fee231efd9628a9ed095d386aa3b22d43c09 --- /dev/null +++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl @@ -0,0 +1,164 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + +#include "common.h" + +__kernel void conv_1x1(__private const int global_size_dim0, + __private const int global_size_dim1, + __private const int global_size_dim2, + __read_only image2d_t input, + __read_only image2d_t filter, + __read_only image2d_t bias, + __write_only image2d_t output_image, + __private const int stride, + __private const int offset, + __private const int input_c, + __private const int input_width,/* of one block */ + __private const int input_height/* of one block */) { + const int out_c = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + const uint kernelHXW = 1; + int2 stride_xy = int2(stride, stride); + int2 ouput_pos_in_one_block = int2(out_w, out_nh); + int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + int2(offset, offset); + int input_c; + half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + + for (int i = 0; i < input_c;h ++i) { + int2 pos_in = int2(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); + if (pos_in.x >=0 && pos_in.y >= 0 && pos_in.x < input_width && pos_in.y < input_height) { + hafl4 input = read_imageh(input, sampler, pos_in); + + half4 weight_x = read_imageh(filter, sampler, int2(i, out_c * 4 + 0)); + output.x += dot(input, weight_x); + + half4 weight_y = read_imageh(filter, sampler, int2(i, out_c * 4 + 1)); + output.y += dot(input, weight_y); + + half4 weight_z = read_imageh(filter, sampler, int2(i, out_c * 4 + 2)); + output.z += dot(input, weight_z); + + half4 weight_w = read_imageh(filter, sampler, int2(i, out_c * 4 + 3)); + output.w += dot(input, weight_w); + } + } +#if defined(RELU) + output = activation(output); +#endif + + int2 output_pos(out_c * global_size_dim1 + out_w, out_nh); + write_imageh(output_image, output_pos, output); +} + + +__kernel void conv_3x3(__private const int global_size_dim0, + __private const int global_size_dim1, + __private const int global_size_dim2, + __read_only image2d_t input, + __read_only image2d_t filter, + __read_only image2d_t bias, + __write_only image2d_t output_image, + __private const int stride, + __private const int offset, + __private const int input_c, + __private const int dilation, + __private const int input_width,/* of one block */ + __private const int input_height/* of one block */) { + int2 stride_xy = int2(stride, stride); + int2 ouput_pos_in_one_block = int2(out_w, out_nh); + int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + int2(offset, offset); + + half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + + half4 input[9]; + + for (int i = 0; i < input_c; ++i) { + int2 pos_in = int2(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); + + input[0] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[1] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[2] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y - dilation)), + half4(0.0),in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height); + + input[3] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y)), + half4(0.0), in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y >= input_height); + + input[4] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y)), + half4(0.0), in_pos_in_one_block.x < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y >= input_height); + + input[5] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y)), + half4(0.0), in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y >= input_height); + + input[6] = select(read_imageh(input, sampler, + int2(pos_in.x - dilation, pos_in.y + dilation)), + half4(0.0), in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + input[7] = select(read_imageh(input, sampler, + int2(pos_in.x, pos_in.y + dilation)), + half4(0.0), in_pos_in_one_block.x < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + input[8] = select(read_imageh(input, sampler, + int2(pos_in.x + dilation, pos_in.y + dilation)), + half4(0.0), pos_in.x + dilation < 0 || in_pos_in_one_block.y + dilation < 0 || pos_in.x + dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height); + + + for (int j = 0; j < 9; ++j) { + + half4 weight_x = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 0 * out_c * 3 + j / 3)); + output.x += dot(input[j], weight_x); + + half4 weight_y = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 1 * out_c * 3 + j / 3)); + output.y += dot(input[j], weight_y); + + half4 weight_z = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 2 * out_c * 3 + j / 3)); + output.z += dot(input[j], weight_z); + + half4 weight_w = read_imageh(filter, sampler, int2(i * 3 + j % 3, out_c * 4 * 3 + 3 * out_c * 3 + j / 3)); + output.w += dot(input[j], weight_w); + + } + } + +#if defined(RELU) + output = activation(output); +#endif + + int2 output_pos(out_c * global_size_dim1 + out_w, out_nh); + write_imageh(output_image, output_pos, output); +} + + + + + + +*/ + diff --git a/src/operators/kernel/cl/cl_kernel/depthwise_conv_kernel.cl b/src/operators/kernel/cl/cl_kernel/depthwise_conv_kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..1c524810a6fdc445787e1750510dec046cfb1373 --- /dev/null +++ b/src/operators/kernel/cl/cl_kernel/depthwise_conv_kernel.cl @@ -0,0 +1,111 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + +__kernel void depth_conv_3x3(__private const int global_size_dim0, + __private const int global_size_dim1, + __private const int global_size_dim2, + __read_only image2d_t input, + __read_only image2d_t filter, + __read_only image2d_t bias, + __write_only image2d_t output_image, + __private const int stride, + __private const int offset, + __private const int input_c, + __private const int dilation, + __private const int input_width,/* of one block */ + __private const int input_height, /* of one block */ + __private const int output_width, + __private const int output_height) { + + const int out_c = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + + const int batch_index = out_nh / output_height; + + const int out_nh_in_one_batch = out_nh % output_height; + + const uint kernelHXW = 1; + + int2 stride_xy = int2(stride, stride); + int2 ouput_pos_in_one_block = int2(out_w, out_nh_in_one_batch); + + int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + int2(offset, offset); + + half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + + int2 pos_in_input_block = int2(out_c * input_width, batch_index * input_height); + + int weight_x_to = out_c * 3; + + half4 inputs[9]; + + inputs[0] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y - 1)), + 0.0, + in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y - 1 >= input_height); + + inputs[1] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y - 1)), + 0.0, + n_pos_in_one_block.x < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y - 1 >= input_height); + + inputs[2] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y - 1)), + 0.0, + in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y - 1 >= input_height); + inputs[3] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y)), + 0.0, + in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y >= input_height); + inputs[4] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y)), + 0.0, + in_pos_in_one_block.x < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y >= input_height); + + inputs[5] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y)), + 0.0, + in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y >= input_height); + + inputs[6] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y + 1)), + 0.0, + in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y + 1 >= input_height); + + inputs[7] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y + 1)), + 0.0, + in_pos_in_one_block.x < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y + 1 >= input_height); + + inputs[8] = select(read_imageh(input, sampler, int2(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y + 1)), + 0.0, + in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y + 1 >= input_height); + + for (int j = 0; j < 9; ++j) { + half4 input = inputs[j]; + half4 weight = read_imageh(filter, sampler, int2(weight_x_to + j % 3, j / 3)); + output.x += input.x * weight.x; + output.y += input.y * weight.y; + output.z += input.z * weight.z; + output.w += input.w * weight.w; + } + + #if defined(RELU) + output = activation(output); + #endif + + int2 output_pos(out_c * global_size_dim1 + out_w, out_nh); + write_imageh(output_image, output_pos, output); +} + +*/ \ No newline at end of file diff --git a/src/operators/kernel/cl/conv_add_bn_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b5de59d61d7527337ae39ce9350b189db38928ff --- /dev/null +++ b/src/operators/kernel/cl/conv_add_bn_kernel.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADDBNRELU_OP + +#include "operators/kernel/conv_add_bn_relu_kernel.h" +#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddBNReluKernel::Init( + FusionConvAddBNReluParam *param) { + return true; +} + +template <> +void ConvAddBNReluKernel::Compute( + const FusionConvAddBNReluParam ¶m) { +} +template class ConvAddBNReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/cl/conv_add_kernel.cpp b/src/operators/kernel/cl/conv_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6df860c3738aaddf45f79c74399d01f9f1647e9d --- /dev/null +++ b/src/operators/kernel/cl/conv_add_kernel.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADD_OP + +#include "operators/kernel/conv_add_kernel.h" +#include "../central-arm-func/conv_add_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddKernel::Init(FusionConvAddParam *param) { + return true; +} + +template <> +void ConvAddKernel::Compute( + const FusionConvAddParam ¶m) { +} + +template class ConvAddKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif