diff --git a/mobile/src/common/types.cpp b/mobile/src/common/types.cpp index c056a58130cc1f625108e655e2fd0f3eb0807563..42a98450a3220bfee9bea4811a9b153ce8ac5b2f 100755 --- a/mobile/src/common/types.cpp +++ b/mobile/src/common/types.cpp @@ -133,6 +133,7 @@ const char *G_OP_TYPE_BEAM_SEARCH_DECODE = "beam_search_decode"; const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE = "fill_constant_batch_size_like"; const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu"; +const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle"; std::unordered_map< std::string, std::pair, std::vector>> @@ -256,5 +257,6 @@ std::unordered_map< {G_OP_TYPE_BEAM_SEARCH_DECODE, {{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}}, {G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}}, - {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}}; + {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}, + {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/mobile/src/common/types.h b/mobile/src/common/types.h index 3b16e5ba74aa2bd743ccc7dfa942eb9542423c95..d876f3b116cbb397ffa8019b1a8d9a637606ec10 100644 --- a/mobile/src/common/types.h +++ b/mobile/src/common/types.h @@ -264,6 +264,7 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU; +extern const char *G_OP_TYPE_PIXEL_SHUFFLE; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/mobile/src/framework/load_ops.h b/mobile/src/framework/load_ops.h index d1d5190c24759f76cf86bc407014d504e2c008c0..b871d2af140730850dfac0fd43383e48012c9ef0 100755 --- a/mobile/src/framework/load_ops.h +++ b/mobile/src/framework/load_ops.h @@ -377,3 +377,6 @@ LOAD_OP1(range, CPU); #ifdef REDUCE_PROD_OP LOAD_OP1(reduce_prod, CPU); #endif +#ifdef PIXEL_SHUFFLE_OP +LOAD_OP1(pixel_shuffle, GPU_CL); +#endif diff --git a/mobile/src/operators/kernel/cl/cl-kernel-func/conv_func.cpp b/mobile/src/operators/kernel/cl/cl-kernel-func/conv_func.cpp index 4da7b62b1bf9e647b69d4fa4007a4b59dc13a298..3489f44d91ed6c4ecb0af9837f4dfd4a4d8c0d6a 100644 --- a/mobile/src/operators/kernel/cl/cl-kernel-func/conv_func.cpp +++ b/mobile/src/operators/kernel/cl/cl-kernel-func/conv_func.cpp @@ -59,6 +59,7 @@ void ConvAddBnReluPt1x2(framework::CLHelper *cl_helper, int input_height = param.Input()->dims()[2]; int output_width = param.Output()->dims()[3]; int output_height = param.Output()->dims()[2]; + int output_c = param.Output()->dims()[1]; int filter_channel = param.Filter()->dims()[1]; int input_channel = param.Input()->dims()[1]; // @@ -216,6 +217,7 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper, int input_height = param.Input()->dims()[2]; int output_width = param.Output()->dims()[3]; int output_height = param.Output()->dims()[2]; + int output_c = param.Output()->dims()[1]; int filter_channel = param.Filter()->dims()[1]; int input_channel = param.Input()->dims()[1]; @@ -397,21 +399,21 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper, status = clSetKernelArg(kernel, index++, sizeof(int), &output_height); CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, index++, sizeof(int), &output_c); + CL_CHECK_ERRORS(status); + if (param.Filter()->dims()[2] == 3 && param.Filter()->dims()[3] == 3) { if (filter_channel != input_channel) { - if (filter_channel != 1) { - status = - clSetKernelArg(kernel, index++, sizeof(int), &filter_channel); - CL_CHECK_ERRORS(status); - int has_group = 1; - status = clSetKernelArg(kernel, index++, sizeof(int), &has_group); - CL_CHECK_ERRORS(status); - } + status = clSetKernelArg(kernel, index++, sizeof(int), &filter_channel); + CL_CHECK_ERRORS(status); + int group = input_channel / filter_channel; + status = clSetKernelArg(kernel, index++, sizeof(int), &group); + CL_CHECK_ERRORS(status); } else { status = clSetKernelArg(kernel, index++, sizeof(int), &filter_channel); CL_CHECK_ERRORS(status); - int has_group = 0; - status = clSetKernelArg(kernel, index++, sizeof(int), &has_group); + int group = 1; + status = clSetKernelArg(kernel, index++, sizeof(int), &group); CL_CHECK_ERRORS(status); } } diff --git a/mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl b/mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl index 831f0da3ff87f19c43e3d396201d99c1e8593664..15b13b1df1a00bbb796463c01517ca77c6bd5bbd 100755 --- a/mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl +++ b/mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl @@ -48,8 +48,9 @@ __kernel void conv_3x3(__private const int global_size_dim0, __private const int input_height,/* of one block */ __private const int output_width, __private const int output_height, + __private const int output_c, __private const int filter_channel, - __private const int has_group) { + __private const int group) { const int out_c = get_global_id(0); const int out_w = get_global_id(1); @@ -90,7 +91,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, #endif half4 input[9]; - if (has_group == 0) { + if (group == 1) { for (int i = 0; i < input_c; ++i) { int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); input[0] = select(read_imageh(input_image, sampler, @@ -326,7 +327,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, } } else { for (int i = 0; i < 4; i++) { - int used_input_channel_num = (out_c * 4 + i) * filter_channel; + int used_input_channel_num = (out_c * 4 + i) / (output_c / group) * filter_channel; for (int f_c = 0; f_c < filter_channel; ++f_c) { int input_c = used_input_channel_num + f_c; int input_block = input_c / 4; diff --git a/mobile/src/operators/kernel/cl/cl_kernel/pixel_shuffle_kernel.cl b/mobile/src/operators/kernel/cl/cl_kernel/pixel_shuffle_kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..a38c1ceae0a0dd502bd4c133c1ce229006e6eba3 --- /dev/null +++ b/mobile/src/operators/kernel/cl/cl_kernel/pixel_shuffle_kernel.cl @@ -0,0 +1,114 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +__kernel void pixel_shuffle(__read_only image2d_t input_image, + __write_only image2d_t output_image, + __private const int in_N, + __private const int in_C, + __private const int in_H, + __private const int in_W, + __private const int out_N, + __private const int out_C, + __private const int out_H, + __private const int out_W, + __private const int upscale_factor) { + + const int out_c4 = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + + int out_h = out_nh % out_H; + int out_n = out_nh / out_H; + + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + + int in_h = out_h / upscale_factor; + int in_w = out_w / upscale_factor; + int in_nh = out_n * in_H + in_h; + + half4 res; + int out_c; + int in_c; + half4 in; + int2 in_pos; + + out_c = out_c4 * 4 + 0; + in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor); + in_pos.x = (in_c / 4) * in_W + in_w; + in_pos.y = in_nh; + in = read_imageh(input_image, sampler, in_pos); + if (in_c % 4 == 0) { + res.x = in.x; + } else if (in_c % 4 == 1) { + res.x = in.y; + } else if (in_c % 4 == 2) { + res.x = in.z; + } else if (in_c % 4 == 3) { + res.x = in.w; + } + + out_c = out_c4 * 4 + 1; + in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor); + in_pos.x = (in_c / 4) * in_W + in_w; + in_pos.y = in_nh; + in = read_imageh(input_image, sampler, in_pos); + if (in_c % 4 == 0) { + res.y = in.x; + } else if (in_c % 4 == 1) { + res.y = in.y; + } else if (in_c % 4 == 2) { + res.y = in.z; + } else if (in_c % 4 == 3) { + res.y = in.w; + } + + out_c = out_c4 * 4 + 2; + in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor); + in_pos.x = (in_c / 4) * in_W + in_w; + in_pos.y = in_nh; + in = read_imageh(input_image, sampler, in_pos); + if (in_c % 4 == 0) { + res.z = in.x; + } else if (in_c % 4 == 1) { + res.z = in.y; + } else if (in_c % 4 == 2) { + res.z = in.z; + } else if (in_c % 4 == 3) { + res.z = in.w; + } + + out_c = out_c4 * 4 + 3; + in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor); + in_pos.x = (in_c / 4) * in_W + in_w; + in_pos.y = in_nh; + in = read_imageh(input_image, sampler, in_pos); + if (in_c % 4 == 0) { + res.w = in.x; + } else if (in_c % 4 == 1) { + res.w = in.y; + } else if (in_c % 4 == 2) { + res.w = in.z; + } else if (in_c % 4 == 3) { + res.w = in.w; + } + + int2 out_pos; + out_pos.x = out_c4 * out_W + out_w; + out_pos.y = out_nh; + write_imageh(output_image, out_pos, res); +} diff --git a/mobile/src/operators/kernel/cl/pixel_shuffle_kernel.cpp b/mobile/src/operators/kernel/cl/pixel_shuffle_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..faa90f9c4329d2450e15c220a68e3d675fb2eacc --- /dev/null +++ b/mobile/src/operators/kernel/cl/pixel_shuffle_kernel.cpp @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PIXEL_SHUFFLE_OP + +#include "operators/kernel/pixel_shuffle_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool PixelShuffleKernel::Init(PixelShuffleParam *param) { + this->cl_helper_.AddKernel("pixel_shuffle", "pixel_shuffle_kernel.cl"); + return true; +} + +template <> +void PixelShuffleKernel::Compute( + const PixelShuffleParam ¶m) { + auto kernel = this->cl_helper_.KernelAt(0); + auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Out()); + + auto input_image = param.InputX()->GetCLImage(); + auto output_image = param.Out()->GetCLImage(); + auto upscale_factor = param.upscale_factor(); + + int input_n = param.InputX()->dims()[0]; + int input_c = param.InputX()->dims()[1]; + int input_h = param.InputX()->dims()[2]; + int input_w = param.InputX()->dims()[3]; + int output_n = param.Out()->dims()[0]; + int output_c = param.Out()->dims()[1]; + int output_h = param.Out()->dims()[2]; + int output_w = param.Out()->dims()[3]; + + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(int), &input_n); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(int), &input_c); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 4, sizeof(int), &input_h); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 5, sizeof(int), &input_w); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 6, sizeof(int), &output_n); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 7, sizeof(int), &output_c); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 8, sizeof(int), &output_h); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 9, sizeof(int), &output_w); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 10, sizeof(int), &upscale_factor); + CL_CHECK_ERRORS(status); + + status = clEnqueueNDRangeKernel( + this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, + default_work_size.data(), NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/mobile/src/operators/kernel/pixel_shuffle_kernel.h b/mobile/src/operators/kernel/pixel_shuffle_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..3f95c866f893f625194afe127dc83851dd874ff7 --- /dev/null +++ b/mobile/src/operators/kernel/pixel_shuffle_kernel.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef LRN_OP + +#include +#ifdef _OPENMP +#include +#endif +#ifdef __ARM_NEON +#include +#include "operators/math/math.h" +#endif +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +class PixelShuffleKernel + : public framework::OpKernelBase> { + public: + void Compute(const PixelShuffleParam ¶m); + bool Init(PixelShuffleParam *param); +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/mobile/src/operators/op_param.h b/mobile/src/operators/op_param.h index 2c695dfe053a3557052a093a34cd8aeb6da88381..2651a0f69766544a0ec09250248682c5b559ef01 100644 --- a/mobile/src/operators/op_param.h +++ b/mobile/src/operators/op_param.h @@ -3628,5 +3628,35 @@ class EXPParam : public OpParam { GType *out_; }; #endif + +#ifdef PIXEL_SHUFFLE_OP +template +class PixelShuffleParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + PixelShuffleParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); + out_ = OutFrom(outputs, *scope); + upscale_factor_ = GetAttr("upscale_factor", attrs); + } + + const GType *InputX() const { return input_x_; } + + GType *Out() const { return out_; } + + const int &upscale_factor() const { return upscale_factor_; } + + private: + GType *input_x_; + GType *out_; + int upscale_factor_; +}; +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/mobile/src/operators/pixel_shuffle_op.cpp b/mobile/src/operators/pixel_shuffle_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9105a72cfbddddbe39ecbbe2f35da204ba118f18 --- /dev/null +++ b/mobile/src/operators/pixel_shuffle_op.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PIXEL_SHUFFLE_OP + +#include "operators/pixel_shuffle_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void PixelShuffleOp::InferShape() const { + auto x_dims = this->param_.InputX()->dims(); + int n = x_dims[0]; + int c = x_dims[1]; + int h = x_dims[2]; + int w = x_dims[3]; + int upscale_factor = this->param_.upscale_factor(); + this->param_.Out()->Resize( + framework::make_ddim({n, c / (upscale_factor * upscale_factor), + h * upscale_factor, w * upscale_factor})); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(pixel_shuffle, ops::PixelShuffleOp); +#endif + +#endif diff --git a/mobile/src/operators/pixel_shuffle_op.h b/mobile/src/operators/pixel_shuffle_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a1c6f8e1adb0c4f52e54974080aaa80e6ebe295f --- /dev/null +++ b/mobile/src/operators/pixel_shuffle_op.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PIXEL_SHUFFLE_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "operators/kernel/pixel_shuffle_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +template +class PixelShuffleOp : public framework::OperatorWithKernel< + DeviceType, PixelShuffleParam, + operators::PixelShuffleKernel> { + public: + PixelShuffleOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, framework::Scope *scope) + : framework::OperatorWithKernel< + DeviceType, PixelShuffleParam, + operators::PixelShuffleKernel>(type, inputs, outputs, + attrs, scope) {} + void InferShape() const override; + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/mobile/tools/op.cmake b/mobile/tools/op.cmake index 9c718dac426509d322de5bc99ac4fc78d4aca5f7..923380940aa10147d65e374265c1073ec37cb11e 100755 --- a/mobile/tools/op.cmake +++ b/mobile/tools/op.cmake @@ -378,6 +378,7 @@ if(NOT FOUND_MATCH) set(RANGE_OP ON) set(REDUCE_PROD_OP ON) set(FUSION_INSTANCENORM_RELU_OP ON) + set(PIXEL_SHUFFLE_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -751,3 +752,6 @@ endif() if (REDUCE_PROD_OP) add_definitions(-DREDUCE_PROD_OP) endif() +if (PIXEL_SHUFFLE_OP) + add_definitions(-DPIXEL_SHUFFLE_OP) +endif()