diff --git a/mobile/src/common/types.cpp b/mobile/src/common/types.cpp index 42a98450a3220bfee9bea4811a9b153ce8ac5b2f..00a4369010248586c9957e9a5d97e22a6d9ab9eb 100755 --- a/mobile/src/common/types.cpp +++ b/mobile/src/common/types.cpp @@ -134,6 +134,8 @@ const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE = "fill_constant_batch_size_like"; const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu"; const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle"; +const char *G_OP_TYPE_EXPAND = "expand"; +const char *G_OP_TYPE_GRID_SAMPLER = "grid_sampler"; std::unordered_map< std::string, std::pair, std::vector>> @@ -156,7 +158,7 @@ std::unordered_map< {G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, - {G_OP_TYPE_INSTANCENORM, {{"X"}, {"Out"}}}, + {G_OP_TYPE_INSTANCENORM, {{"X"}, {"Y"}}}, {G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, @@ -258,5 +260,7 @@ std::unordered_map< {{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}}, {G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}, - {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}}; + {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}, + {G_OP_TYPE_EXPAND, {{"X"}, {"Out"}}}, + {G_OP_TYPE_GRID_SAMPLER, {{"X", "Grid"}, {"Output"}}}}; } // namespace paddle_mobile diff --git a/mobile/src/common/types.h b/mobile/src/common/types.h index d876f3b116cbb397ffa8019b1a8d9a637606ec10..cc49182adb75be6d81d403971d53dca6f0b46627 100644 --- a/mobile/src/common/types.h +++ b/mobile/src/common/types.h @@ -265,6 +265,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU; extern const char *G_OP_TYPE_PIXEL_SHUFFLE; +extern const char *G_OP_TYPE_EXPAND; +extern const char *G_OP_TYPE_GRID_SAMPLER; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/mobile/src/framework/load_ops.h b/mobile/src/framework/load_ops.h index b871d2af140730850dfac0fd43383e48012c9ef0..e04db5d1e8d6e2a75343cbee15269d607f71b7c9 100755 --- a/mobile/src/framework/load_ops.h +++ b/mobile/src/framework/load_ops.h @@ -246,7 +246,7 @@ LOAD_OP2(fusion_conv_bn, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_conv_bn); #endif #ifdef ELEMENTWISESUB_OP -LOAD_OP1(elementwise_sub, CPU) +LOAD_OP2(elementwise_sub, CPU, GPU_CL) #endif #ifdef TOP_K_OP LOAD_OP1(top_k, CPU) @@ -380,3 +380,9 @@ LOAD_OP1(reduce_prod, CPU); #ifdef PIXEL_SHUFFLE_OP LOAD_OP1(pixel_shuffle, GPU_CL); #endif +#ifdef EXPAND_OP +LOAD_OP1(expand, GPU_CL); +#endif +#ifdef GRID_SAMPLER_OP +LOAD_OP1(grid_sampler, GPU_CL); +#endif diff --git a/mobile/src/operators/elementwise_sub_op.cpp b/mobile/src/operators/elementwise_sub_op.cpp index 9b9d89073a637fb769687684ead23829e5445c90..6962e69a8de5522aeff912fe84484e36879300d4 100644 --- a/mobile/src/operators/elementwise_sub_op.cpp +++ b/mobile/src/operators/elementwise_sub_op.cpp @@ -32,6 +32,9 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp); #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(elementwise_sub, ops::ElementwiseSubOp); +#endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/mobile/src/operators/fusion_instancenorm_relu_op.h b/mobile/src/operators/fusion_instancenorm_relu_op.h index ce2623e4dda46a0952fede3e1a25012ed5da4394..91551e65586b822d75336450b4cd0db2a7dd7d26 100644 --- a/mobile/src/operators/fusion_instancenorm_relu_op.h +++ b/mobile/src/operators/fusion_instancenorm_relu_op.h @@ -45,7 +45,7 @@ class FusionInstanceNormReluMatcher : public framework::FusionOpMatcher { template class FusionInstanceNormReluOp : public framework::OperatorWithKernel< - DeviceType, InstanceNormParam, + DeviceType, FusionInstanceNormReluParam, operators::InstanceNormReluKernel> { public: FusionInstanceNormReluOp(const string &type, const VariableNameMap &inputs, @@ -53,7 +53,7 @@ class FusionInstanceNormReluOp const framework::AttributeMap &attrs, framework::Scope *scope) : framework::OperatorWithKernel< - DeviceType, InstanceNormParam, + DeviceType, FusionInstanceNormReluParam, operators::InstanceNormReluKernel>( type, inputs, outputs, attrs, scope) {} diff --git a/mobile/src/operators/instancenorm_op.cpp b/mobile/src/operators/instancenorm_op.cpp index 82cdf36f47414771eb6829751e04bd559c6ff29e..42af75ca21ba4a70a78c50fa34ab674278bea743 100644 --- a/mobile/src/operators/instancenorm_op.cpp +++ b/mobile/src/operators/instancenorm_op.cpp @@ -24,7 +24,7 @@ namespace operators { template void InstanceNormOp::InferShape() const { auto x_dims = this->param_.InputX()->dims(); - this->param_.Out()->Resize(x_dims); + this->param_.OutputY()->Resize(x_dims); } } // namespace operators diff --git a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp index 84c3230d82bd2bfb54210e3e57ecf95bb43b7ff9..eabbfe5be4f67345a0919665def8509d640ed386 100644 --- a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp +++ b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.cpp @@ -17,17 +17,17 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { void InstanceNorm(framework::CLHelper *cl_helper, - const InstanceNormParam ¶m) { + const framework::CLImage *input, framework::CLImage *output, + float epsilon) { auto kernel = cl_helper->KernelAt(0); - auto &dims = param.Out()->dims(); + auto &dims = output->dims(); const int n = dims[0]; const int c_group = (dims[1] + 3) / 4; const int h = dims[2]; const int w = dims[3]; - auto epsilon = param.Epsilon(); - auto input = param.InputX()->GetCLImage(); - auto out = param.Out()->GetCLImage(); + auto input_image = input->GetCLImage(); + auto out_image = output->GetCLImage(); // DLOG << "Epsilon: " << epsilon; @@ -66,9 +66,9 @@ void InstanceNorm(framework::CLHelper *cl_helper, CL_CHECK_ERRORS(status); clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); + clSetKernelArg(kernel, 6, sizeof(cl_mem), &input_image); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); + clSetKernelArg(kernel, 7, sizeof(cl_mem), &out_image); CL_CHECK_ERRORS(status); clEnqueueNDRangeKernel(cl_helper->CLCommandQueue(), kernel, 3, NULL, work_size, local_work_size, 0, NULL, NULL); diff --git a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h index 45c0bcd4e8e8ea0d6c24904b4fa7fc763d3e9bc1..1e46ebf4ba497b44699a33adf27dd21830e1e3a4 100644 --- a/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h +++ b/mobile/src/operators/kernel/cl/cl-kernel-func/instancenorm_func.h @@ -21,7 +21,8 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { void InstanceNorm(framework::CLHelper *cl_helper, - const InstanceNormParam ¶m); + const framework::CLImage *input, framework::CLImage *output, + float epsilon); } } // namespace paddle_mobile #endif diff --git a/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..1f62ff377a7f8fddaeae108a8cfaa6d98847f9af --- /dev/null +++ b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_sub_kernel.cl @@ -0,0 +1,27 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +__kernel void elementwise_sub(__global image2d_t inputImage, __global image2d_t bias, __write_only image2d_t outputImage) { + int x = get_global_id(0); + int y = get_global_id(1); + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + int2 coords; + coords.x = x; + coords.y = y; + half4 input = read_imageh(inputImage, sampler, coords); + half4 biase = read_imageh(bias, sampler, coords); + half4 output = input - biase; + write_imageh(outputImage, coords, output); + } diff --git a/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp b/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b107b3de3c1df163e9f987c9a8cdff23b6a71c43 --- /dev/null +++ b/mobile/src/operators/kernel/cl/elementwise_sub_kernel.cpp @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISESUB_OP + +#include "operators/kernel/elementwise_sub_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseSubKernel::Init( + ElementwiseSubParam *param) { + framework::CLImage *bias = reinterpret_cast( + const_cast(param->InputY())); + if (bias->dims().size() == 4) { + if (!bias->isInit()) { + bias->InitNormalCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); + } + DLOG << " bias: " << *bias; + this->cl_helper_.AddKernel("elementwise_sub", "elementwise_sub_kernel.cl"); + } else { + DLOG << "error:bias dims not support"; + } + return true; +} + +template <> +void ElementwiseSubKernel::Compute( + const ElementwiseSubParam ¶m) { + auto input = param.InputX(); + auto bias = param.InputY(); + auto output = param.Out(); + cl_int status; + auto kernel = this->cl_helper_.KernelAt(0); + if (bias->dims().size() == 4) { + cl_mem input_image = input->GetCLImage(); + cl_mem bias_image = bias->GetCLImage(); + cl_mem output_image = output->GetCLImage(); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bias_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_image); + CL_CHECK_ERRORS(status); + auto width = input->ImageWidth(); + auto height = input->ImageHeight(); + size_t global_work_size[2] = {width, height}; + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); + } else { + DLOG << "error:bias dims not support"; + } +} + +template class ElementwiseSubKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp index f068d36133e826e8caa79d8f4852bbaac4415cdd..439554ec10696913b42923177828870790f0f711 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp @@ -23,7 +23,7 @@ namespace operators { template <> bool InstanceNormKernel::Init(InstanceNormParam *param) { - auto &dims = param->Out()->dims(); + auto &dims = param->OutputY()->dims(); const int h = dims[2]; std::string build_options = ""; if (h == 128) { @@ -41,7 +41,8 @@ bool InstanceNormKernel::Init(InstanceNormParam *param) { template <> void InstanceNormKernel::Compute( const InstanceNormParam ¶m) { - InstanceNorm(&this->cl_helper_, param); + InstanceNorm(&this->cl_helper_, param.InputX(), param.OutputY(), + param.Epsilon()); } template class InstanceNormKernel; diff --git a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp index c265454d0ea67c7a6aec8f1017bc5455d328a756..270d77c4a051df227719338f6793e64aa2920f9f 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp @@ -23,7 +23,7 @@ namespace operators { template <> bool InstanceNormReluKernel::Init( - InstanceNormParam *param) { + FusionInstanceNormReluParam *param) { auto &dims = param->Out()->dims(); const int h = dims[2]; std::string build_options = "-DRELU"; @@ -41,8 +41,8 @@ bool InstanceNormReluKernel::Init( template <> void InstanceNormReluKernel::Compute( - const InstanceNormParam ¶m) { - InstanceNorm(&this->cl_helper_, param); + const FusionInstanceNormReluParam ¶m) { + InstanceNorm(&this->cl_helper_, param.InputX(), param.Out(), param.Epsilon()); } template class InstanceNormReluKernel; diff --git a/mobile/src/operators/kernel/instancenorm_relu_kernel.h b/mobile/src/operators/kernel/instancenorm_relu_kernel.h index 9a4bedb564ea68e252f65372c38f3cfce13f339f..cb2a0e1f3cb739847cdf4f635de74c223896106b 100644 --- a/mobile/src/operators/kernel/instancenorm_relu_kernel.h +++ b/mobile/src/operators/kernel/instancenorm_relu_kernel.h @@ -30,10 +30,10 @@ using framework::OpKernelBase; template class InstanceNormReluKernel - : public OpKernelBase> { + : public OpKernelBase> { public: - void Compute(const InstanceNormParam ¶m); - bool Init(InstanceNormParam *param); + void Compute(const FusionInstanceNormReluParam ¶m); + bool Init(FusionInstanceNormReluParam *param); }; } // namespace operators diff --git a/mobile/src/operators/op_param.h b/mobile/src/operators/op_param.h index 2651a0f69766544a0ec09250248682c5b559ef01..7fd4515f8f787e14f6c081b2a2607e1c80e2843c 100644 --- a/mobile/src/operators/op_param.h +++ b/mobile/src/operators/op_param.h @@ -927,6 +927,35 @@ class InstanceNormParam : public OpParam { Scope *scope) : OpParam(inputs, outputs, attrs, scope) { input_x_ = InputXFrom(inputs, *scope); + output_y_ = OutputYFrom(outputs, *scope); + epsilon_ = GetAttr("epsilon", attrs); + } + + const GType *InputX() const { return input_x_; } + + GType *OutputY() const { return output_y_; } + + const float &Epsilon() const { return epsilon_; } + + private: + GType *input_x_; + GType *output_y_; + float epsilon_; +}; +#endif + +#ifdef FUSION_INSTANCENORM_RELU_OP +template +class FusionInstanceNormReluParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionInstanceNormReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); out_ = OutFrom(outputs, *scope); epsilon_ = GetAttr("epsilon", attrs); } @@ -3658,5 +3687,56 @@ class PixelShuffleParam : public OpParam { }; #endif +#ifdef EXPAND_OP +template +class ExpandParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + ExpandParam(const VariableNameMap &inputs, const VariableNameMap &outputs, + const AttributeMap &attrs, Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); + out_ = OutFrom(outputs, *scope); + expand_times_ = GetAttr>("expand_times", attrs); + } + + const GType *InputX() const { return input_x_; } + + GType *Out() const { return out_; } + + private: + GType *input_x_; + GType *out_; + std::vector expand_times_; +}; +#endif + +#ifdef GRID_SAMPLER_OP +template +class GridSamplerParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + GridSamplerParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + input_x_ = InputXFrom(inputs, *scope); + output_ = OutputFrom(outputs, *scope); + } + + const GType *InputX() const { return input_x_; } + + GType *Output() const { return output_; } + + private: + GType *input_x_; + GType *output_; +}; +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/mobile/tools/op.cmake b/mobile/tools/op.cmake index 923380940aa10147d65e374265c1073ec37cb11e..100ae78de1688638f8e44b03f8f8369b7fe45f4d 100755 --- a/mobile/tools/op.cmake +++ b/mobile/tools/op.cmake @@ -379,6 +379,8 @@ if(NOT FOUND_MATCH) set(REDUCE_PROD_OP ON) set(FUSION_INSTANCENORM_RELU_OP ON) set(PIXEL_SHUFFLE_OP ON) + set(EXPAND_OP ON) + set(GRID_SAMPLER_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -755,3 +757,10 @@ endif() if (PIXEL_SHUFFLE_OP) add_definitions(-DPIXEL_SHUFFLE_OP) endif() +if (EXPAND_OP) + add_definitions(-DEXPAND_OP) +endif() +if (GRID_SAMPLER_OP) + add_definitions(-DGRID_SAMPLER_OP) +endif() +