From 84e1d25f38e2295faca4f6e47109c6a54896cf87 Mon Sep 17 00:00:00 2001 From: Jiaying Zhao Date: Wed, 18 Dec 2019 20:56:20 +0800 Subject: [PATCH] [MOBILE][OPENCL]Add elementwise_sub op; Fix instancenorm op; (#2622) --- mobile/src/common/types.cpp | 8 +- mobile/src/common/types.h | 2 + mobile/src/framework/load_ops.h | 8 +- mobile/src/operators/elementwise_sub_op.cpp | 3 + .../operators/fusion_instancenorm_relu_op.h | 4 +- mobile/src/operators/instancenorm_op.cpp | 2 +- .../cl/cl-kernel-func/instancenorm_func.cpp | 14 ++-- .../cl/cl-kernel-func/instancenorm_func.h | 3 +- .../cl/cl_kernel/elementwise_sub_kernel.cl | 27 +++++++ .../kernel/cl/elementwise_sub_kernel.cpp | 75 +++++++++++++++++ .../kernel/cl/instancenorm_kernel.cpp | 5 +- .../kernel/cl/instancenorm_relu_kernel.cpp | 6 +- .../kernel/instancenorm_relu_kernel.h | 6 +- mobile/src/operators/op_param.h | 80 +++++++++++++++++++ mobile/tools/op.cmake | 9 +++ 15 files changed, 230 insertions(+), 22 deletions(-) create mode 100644 mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl create mode 100644 mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp diff --git a/mobile/src/common/types.cpp b/mobile/src/common/types.cpp index 42a98450a3..00a4369010 100755 --- a/mobile/src/common/types.cpp +++ b/mobile/src/common/types.cpp @@ -134,6 +134,8 @@ const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE = "fill_constant_batch_size_like"; const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu"; const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle"; +const char *G_OP_TYPE_EXPAND = "expand"; +const char *G_OP_TYPE_GRID_SAMPLER = "grid_sampler"; std::unordered_map< std::string, std::pair, std::vector>> @@ -156,7 +158,7 @@ std::unordered_map< {G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, - {G_OP_TYPE_INSTANCENORM, {{"X"}, {"Out"}}}, + {G_OP_TYPE_INSTANCENORM, {{"X"}, {"Y"}}}, {G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, @@ -258,5 +260,7 @@ std::unordered_map< {{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}}, {G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}, - {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}}; + {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}, + {G_OP_TYPE_EXPAND, {{"X"}, {"Out"}}}, + {G_OP_TYPE_GRID_SAMPLER, {{"X", "Grid"}, {"Output"}}}}; } // namespace paddle_mobile diff --git a/mobile/src/common/types.h b/mobile/src/common/types.h index d876f3b116..cc49182adb 100644 --- a/mobile/src/common/types.h +++ b/mobile/src/common/types.h @@ -265,6 +265,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU; extern const char *G_OP_TYPE_PIXEL_SHUFFLE; +extern const char *G_OP_TYPE_EXPAND; +extern const char *G_OP_TYPE_GRID_SAMPLER; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/mobile/src/framework/load_ops.h b/mobile/src/framework/load_ops.h index b871d2af14..e04db5d1e8 100755 --- a/mobile/src/framework/load_ops.h +++ b/mobile/src/framework/load_ops.h @@ -246,7 +246,7 @@ LOAD_OP2(fusion_conv_bn, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_conv_bn); #endif #ifdef ELEMENTWISESUB_OP -LOAD_OP1(elementwise_sub, CPU) +LOAD_OP2(elementwise_sub, CPU, GPU_CL) #endif #ifdef TOP_K_OP LOAD_OP1(top_k, CPU) @@ -380,3 +380,9 @@ LOAD_OP1(reduce_prod, CPU); #ifdef PIXEL_SHUFFLE_OP LOAD_OP1(pixel_shuffle, GPU_CL); #endif +#ifdef EXPAND_OP +LOAD_OP1(expand, GPU_CL); +#endif +#ifdef GRID_SAMPLER_OP +LOAD_OP1(grid_sampler, GPU_CL); +#endif diff --git a/mobile/src/operators/elementwise_sub_op.cpp b/mobile/src/operators/elementwise_sub_op.cpp index 9b9d89073a..6962e69a8d 100644 --- a/mobile/src/operators/elementwise_sub_op.cpp +++ b/mobile/src/operators/elementwise_sub_op.cpp @@ -32,6 +32,9 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp); #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(elementwise_sub, ops::ElementwiseSubOp); +#endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/mobile/src/operators/fusion_instancenorm_relu_op.h b/mobile/src/operators/fusion_instancenorm_relu_op.h index ce2623e4dd..91551e6558 100644 --- a/mobile/src/operators/fusion_instancenorm_relu_op.h +++ b/mobile/src/operators/fusion_instancenorm_relu_op.h @@ -45,7 +45,7 @@ class FusionInstanceNormReluMatcher : public framework::FusionOpMatcher { template class FusionInstanceNormReluOp : public framework::OperatorWithKernel< - DeviceType, InstanceNormParam, + DeviceType, FusionInstanceNormReluParam, operators::InstanceNormReluKernel> { public: FusionInstanceNormReluOp(const string &type, const VariableNameMap &inputs, @@ -53,7 +53,7 @@ class FusionInstanceNormReluOp const framework::AttributeMap &attrs, framework::Scope *scope) : framework::OperatorWithKernel< - DeviceType, InstanceNormParam, + DeviceType, FusionInstanceNormReluParam, operators::InstanceNormReluKernel>( type, inputs, outputs, attrs, scope) {} diff --git a/mobile/src/operators/instancenorm_op.cpp b/mobile/src/operators/instancenorm_op.cpp index 82cdf36f47..42af75ca21 100644 --- a/mobile/src/operators/instancenorm_op.cpp +++ b/mobile/src/operators/instancenorm_op.cpp @@ -24,7 +24,7 @@ namespace operators { template void InstanceNormOp::InferShape() const { auto x_dims = this->param_.InputX()->dims(); - this->param_.Out()->Resize(x_dims); + this->param_.OutputY()->Resize(x_dims); } } // namespace operators diff --git a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp index 84c3230d82..eabbfe5be4 100644 --- a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp +++ b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp @@ -17,17 +17,17 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { void InstanceNorm(framework::CLHelper *cl_helper, - const InstanceNormParam ¶m) { + const framework::CLImage *input, framework::CLImage *output, + float epsilon) { auto kernel = cl_helper->KernelAt(0); - auto &dims = param.Out()->dims(); + auto &dims = output->dims(); const int n = dims[0]; const int c_group = (dims[1] + 3) / 4; const int h = dims[2]; const int w = dims[3]; - auto epsilon = param.Epsilon(); - auto input = param.InputX()->GetCLImage(); - auto out = param.Out()->GetCLImage(); + auto input_image = input->GetCLImage(); + auto out_image = output->GetCLImage(); // DLOG << "Epsilon: " << epsilon; @@ -66,9 +66,9 @@ void InstanceNorm(framework::CLHelper *cl_helper, CL_CHECK_ERRORS(status); clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); + clSetKernelArg(kernel, 6, sizeof(cl_mem), &input_image); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); + clSetKernelArg(kernel, 7, sizeof(cl_mem), &out_image); CL_CHECK_ERRORS(status); clEnqueueNDRangeKernel(cl_helper->CLCommandQueue(), kernel, 3, NULL, work_size, local_work_size, 0, NULL, NULL); diff --git a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h index 45c0bcd4e8..1e46ebf4ba 100644 --- a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h +++ b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h @@ -21,7 +21,8 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { void InstanceNorm(framework::CLHelper *cl_helper, - const InstanceNormParam ¶m); + const framework::CLImage *input, framework::CLImage *output, + float epsilon); } } // namespace paddle_mobile #endif diff --git a/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl new file mode 100644 index 0000000000..1f62ff377a --- /dev/null +++ b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl @@ -0,0 +1,27 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +__kernel void elementwise_sub(__global image2d_t inputImage, __global image2d_t bias, __write_only image2d_t outputImage) { + int x = get_global_id(0); + int y = get_global_id(1); + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + int2 coords; + coords.x = x; + coords.y = y; + half4 input = read_imageh(inputImage, sampler, coords); + half4 biase = read_imageh(bias, sampler, coords); + half4 output = input - biase; + write_imageh(outputImage, coords, output); + } diff --git a/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp b/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp new file mode 100644 index 0000000000..b107b3de3c --- /dev/null +++ b/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISESUB_OP + +#include "operators/kernel/elementwise_sub_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseSubKernel::Init( + ElementwiseSubParam *param) { + framework::CLImage *bias = reinterpret_cast( + const_cast(param->InputY())); + if (bias->dims().size() == 4) { + if (!bias->isInit()) { + bias->InitNormalCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); + } + DLOG << " bias: " << *bias; + this->cl_helper_.AddKernel("elementwise_sub", "elementwise_sub_kernel.cl"); + } else { + DLOG << "error:bias dims not support"; + } + return true; +} + +template <> +void ElementwiseSubKernel::Compute( + const ElementwiseSubParam ¶m) { + auto input = param.InputX(); + auto bias = param.InputY(); + auto output = param.Out(); + cl_int status; + auto kernel = this->cl_helper_.KernelAt(0); + if (bias->dims().size() == 4) { + cl_mem input_image = input->GetCLImage(); + cl_mem bias_image = bias->GetCLImage(); + cl_mem output_image = output->GetCLImage(); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bias_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_image); + CL_CHECK_ERRORS(status); + auto width = input->ImageWidth(); + auto height = input->ImageHeight(); + size_t global_work_size[2] = {width, height}; + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); + } else { + DLOG << "error:bias dims not support"; + } +} + +template class ElementwiseSubKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp index f068d36133..439554ec10 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp @@ -23,7 +23,7 @@ namespace operators { template <> bool InstanceNormKernel::Init(InstanceNormParam *param) { - auto &dims = param->Out()->dims(); + auto &dims = param->OutputY()->dims(); const int h = dims[2]; std::string build_options = ""; if (h == 128) { @@ -41,7 +41,8 @@ bool InstanceNormKernel::Init(InstanceNormParam *param) { template <> void InstanceNormKernel::Compute( const InstanceNormParam ¶m) { - InstanceNorm(&this->cl_helper_, param); + InstanceNorm(&this->cl_helper_, param.InputX(), param.OutputY(), + param.Epsilon()); } template class InstanceNormKernel; diff --git a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp index c265454d0e..270d77c4a0 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp @@ -23,7 +23,7 @@ namespace operators { template <> bool InstanceNormReluKernel::Init( - InstanceNormParam *param) { + FusionInstanceNormReluParam *param) { auto &dims = param->Out()->dims(); const int h = dims[2]; std::string build_options = "-DRELU"; @@ -41,8 +41,8 @@ bool InstanceNormReluKernel::Init( template <> void InstanceNormReluKernel::Compute( - const InstanceNormParam ¶m) { - InstanceNorm(&this->cl_helper_, param); + const FusionInstanceNormReluParam ¶m) { + InstanceNorm(&this->cl_helper_, param.InputX(), param.Out(), param.Epsilon()); } template class InstanceNormReluKernel; diff --git a/mobile/src/operators/kernel/instancenorm_relu_kernel.h b/mobile/src/operators/kernel/instancenorm_relu_kernel.h index 9a4bedb564..cb2a0e1f3c 100644 --- a/mobile/src/operators/kernel/instancenorm_relu_kernel.h +++ b/mobile/src/operators/kernel/instancenorm_relu_kernel.h @@ -30,10 +30,10 @@ using framework::OpKernelBase; template class InstanceNormReluKernel - : public OpKernelBase> { + : public OpKernelBase> { public: - void Compute(const InstanceNormParam ¶m); - bool Init(InstanceNormParam *param); + void Compute(const FusionInstanceNormReluParam ¶m); + bool Init(FusionInstanceNormReluParam *param); }; } // namespace operators diff --git a/mobile/src/operators/op_param.h b/mobile/src/operators/op_param.h index 2651a0f697..7fd4515f8f 100644 --- a/mobile/src/operators/op_param.h +++ b/mobile/src/operators/op_param.h @@ -927,6 +927,35 @@ class InstanceNormParam : public OpParam { Scope *scope) : OpParam(inputs, outputs, attrs, scope) { input_x_ = InputXFrom(inputs, *scope); + output_y_ = OutputYFrom(outputs, *scope); + epsilon_ = GetAttr("epsilon", attrs); + } + + const GType *InputX() const { return input_x_; } + + GType *OutputY() const { return output_y_; } + + const float &Epsilon() const { return epsilon_; } + + private: + GType *input_x_; + GType *output_y_; + float epsilon_; +}; +#endif + +#ifdef FUSION_INSTANCENORM_RELU_OP +template +class FusionInstanceNormReluParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionInstanceNormReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); out_ = OutFrom(outputs, *scope); epsilon_ = GetAttr("epsilon", attrs); } @@ -3658,5 +3687,56 @@ class PixelShuffleParam : public OpParam { }; #endif +#ifdef EXPAND_OP +template +class ExpandParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + ExpandParam(const VariableNameMap &inputs, const VariableNameMap &outputs, + const AttributeMap &attrs, Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); + out_ = OutFrom(outputs, *scope); + expand_times_ = GetAttr>("expand_times", attrs); + } + + const GType *InputX() const { return input_x_; } + + GType *Out() const { return out_; } + + private: + GType *input_x_; + GType *out_; + std::vector expand_times_; +}; +#endif + +#ifdef GRID_SAMPLER_OP +template +class GridSamplerParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + GridSamplerParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); + output_ = OutputFrom(outputs, *scope); + } + + const GType *InputX() const { return input_x_; } + + GType *Output() const { return output_; } + + private: + GType *input_x_; + GType *output_; +}; +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/mobile/tools/op.cmake b/mobile/tools/op.cmake index 923380940a..100ae78de1 100755 --- a/mobile/tools/op.cmake +++ b/mobile/tools/op.cmake @@ -379,6 +379,8 @@ if(NOT FOUND_MATCH) set(REDUCE_PROD_OP ON) set(FUSION_INSTANCENORM_RELU_OP ON) set(PIXEL_SHUFFLE_OP ON) + set(EXPAND_OP ON) + set(GRID_SAMPLER_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -755,3 +757,10 @@ endif() if (PIXEL_SHUFFLE_OP) add_definitions(-DPIXEL_SHUFFLE_OP) endif() +if (EXPAND_OP) + add_definitions(-DEXPAND_OP) +endif() +if (GRID_SAMPLER_OP) + add_definitions(-DGRID_SAMPLER_OP) +endif() + -- GitLab