diff --git a/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp index 1c2df34184b36889f63849a8584c3a6b7dd9760c..229b96b550e8f8c1f693768898ac879e486b56a8 100644 --- a/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp @@ -16,9 +16,12 @@ limitations under the License. */ #include "operators/kernel/conv_add_bn_relu_kernel.h" #include +#include "framework/context.h" #include "operators/kernel/arm/convolution/conv_common.h" #include "operators/kernel/central-arm-func/conv_arm_func.h" #include "operators/math/element_wise.h" +#include "operators/math/gemm/gemm1x1s1.h" +#include "operators/math/slidingwindow_utils.h" namespace paddle_mobile { namespace operators { @@ -64,6 +67,13 @@ bool ConvAddBNReluKernel::Init( // try to use faster depthwise conv switch (param->ExecMode()) { + case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: + case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: + use_slidingwindow_add_bn_relu = true; + break; + case ConvParam::EXEC_GEMM1x1s1_FLOAT: + use_gemm_add_bn_relu = true; + break; case ConvParam::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam::EXEC_DEPTHWISE3x3S2_FLOAT: const std::vector &paddings = param->Paddings(); @@ -84,7 +94,8 @@ bool ConvAddBNReluKernel::Init( break; } - if (could_use_faster_depthwise_conv_) { + if (could_use_faster_depthwise_conv_ || use_gemm_add_bn_relu || + use_slidingwindow_add_bn_relu) { auto filter_data = param->Filter()->data(); auto filter_dim = param->Filter()->dims(); int len = 1; @@ -99,6 +110,16 @@ bool ConvAddBNReluKernel::Init( filter_data[i * step + k] * new_scale_ptr[i]; } } + if (use_gemm_add_bn_relu) { + ARMArch arch = framework::CPUContext::Context()->get_arch(); + math::gemm1x1s1_transform_weight(*param->Filter(), *param->Output(), + param->transformed_filter_, + param->groups, arch); + } + if (use_slidingwindow_add_bn_relu) { + math::slidingwindow_transform_weight(*param->Filter(), + param->transformed_filter_); + } } return true; @@ -129,11 +150,15 @@ void ConvAddBNReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + fusion_has_been_computed = true; + GemmConv1x1s1(param, param.NewBias()->data(), true, + true); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, param.NewBias()->data(), + true, true); + fusion_has_been_computed = true; break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", diff --git a/src/operators/kernel/arm/convolution/conv_add_kernel.cpp b/src/operators/kernel/arm/convolution/conv_add_kernel.cpp index 20474a904f554a2b138053835992918b3abe914b..66ed513ac97032fbdc47edd724950f0dd14c11ec 100644 --- a/src/operators/kernel/arm/convolution/conv_add_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_add_kernel.cpp @@ -30,6 +30,7 @@ bool ConvAddKernel::Init(FusionConvAddParam *param) { template <> void ConvAddKernel::Compute(const FusionConvAddParam ¶m) { + bool fusion_has_been_computed = false; switch (param.ExecMode()) { case ConvParam::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam::EXEC_DEPTHWISE3x3S2_FLOAT: @@ -45,22 +46,28 @@ void ConvAddKernel::Compute(const FusionConvAddParam ¶m) { GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + fusion_has_been_computed = true; + GemmConv1x1s1(param, param.Bias()->data(), true, + false); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, param.Bias()->data(), + true, false); + fusion_has_been_computed = true; break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", param.ExecMode()); } - if (param.Bias()->dims() == param.Output()->dims()) { - math::AddElememtWise(param.Output(), param.Bias(), param.Axis(), - param.Output()); - } else { - math::AddChannelWise(param.Output(), param.Bias(), - param.Output()); + if (!fusion_has_been_computed) { + if (param.Bias()->dims() == param.Output()->dims()) { + math::AddElememtWise(param.Output(), param.Bias(), param.Axis(), + param.Output()); + } else { + math::AddChannelWise(param.Output(), param.Bias(), + param.Output()); + } } } diff --git a/src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp index bfdd58e944d28aec6292cd20cf11891fc9449a15..54eb2ca23b46347bcdda45feda6f9bfda12b3745 100644 --- a/src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp @@ -31,6 +31,7 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { template <> void ConvAddReluKernel::Compute( const FusionConvAddReluParam ¶m) { + bool fusion_has_been_computed = false; switch (param.ExecMode()) { case ConvParam::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam::EXEC_DEPTHWISE3x3S2_FLOAT: @@ -46,21 +47,25 @@ void ConvAddReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + fusion_has_been_computed = true; + GemmConv1x1s1(param, param.Bias()->data(), true, + true); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, nullptr, false, false); break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", param.ExecMode()); } - if (param.Bias()->dims() == param.Output()->dims()) { - math::AddElememtWise(param.Output(), param.Bias(), param.Axis(), - param.Output()); - } else { - math::AddChannelWise(param.Output(), param.Bias(), param.Output()); + if (!fusion_has_been_computed) { + if (param.Bias()->dims() == param.Output()->dims()) { + math::AddElememtWise(param.Output(), param.Bias(), param.Axis(), + param.Output()); + } else { + math::AddChannelWise(param.Output(), param.Bias(), param.Output()); + } } } diff --git a/src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp index 6df3cb7f1bd19b7551481a0c7fe312648bc454b2..138e34d78eccb3e38eabc54323b4172fe0f47876 100644 --- a/src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp @@ -65,11 +65,11 @@ void ConvBNAddReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + GemmConv1x1s1(param, nullptr, false, false); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, nullptr, false, false); break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", diff --git a/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp index 2f0387125f98b3b67aabc62582375fefa2a105b9..f217902bf2c3d6fa4f702f9589b0cadf683bd566 100644 --- a/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp @@ -16,9 +16,12 @@ limitations under the License. */ #include "operators/kernel/conv_bn_relu_kernel.h" #include +#include "framework/context.h" #include "operators/kernel/arm/convolution/conv_common.h" #include "operators/kernel/central-arm-func/conv_arm_func.h" #include "operators/math/element_wise.h" +#include "operators/math/gemm/gemm1x1s1.h" +#include "operators/math/slidingwindow_utils.h" namespace paddle_mobile { namespace operators { @@ -57,12 +60,50 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { param->SetNewBias(new_bias); InitBaseConvKernel(param); + + switch (param->ExecMode()) { + case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: + case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: + use_slidingwindow_bn_relu = true; + break; + case ConvParam::EXEC_GEMM1x1s1_FLOAT: + use_gemm_bn_relu = true; + break; + } + + if (use_gemm_bn_relu || use_slidingwindow_bn_relu) { + auto filter_data = param->Filter()->data(); + auto filter_dim = param->Filter()->dims(); + int len = 1; + for (int i = 0; i < filter_dim.size(); i++) { + len *= filter_dim[i]; + } + int batch = filter_dim[0]; + int step = len / batch; + for (int i = 0; i < batch; i++) { + for (int k = 0; k < step; k++) { + filter_data[i * step + k] = + filter_data[i * step + k] * new_scale_ptr[i]; + } + } + if (use_gemm_bn_relu) { + ARMArch arch = framework::CPUContext::Context()->get_arch(); + math::gemm1x1s1_transform_weight(*param->Filter(), *param->Output(), + param->transformed_filter_, + param->groups, arch); + } + if (use_slidingwindow_bn_relu) { + math::slidingwindow_transform_weight(*param->Filter(), + param->transformed_filter_); + } + } return true; } template <> void ConvBNReluKernel::Compute( const FusionConvBNReluParam ¶m) { + bool fusion_has_been_computed = false; switch (param.ExecMode()) { case ConvParam::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam::EXEC_DEPTHWISE3x3S2_FLOAT: @@ -78,18 +119,24 @@ void ConvBNReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + GemmConv1x1s1(param, param.NewBias()->data(), true, + true); + fusion_has_been_computed = true; break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, param.NewBias()->data(), + true, true); + fusion_has_been_computed = true; break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", param.ExecMode()); } - math::ScaleAddChannelWise(param.Output(), param.NewScale(), - param.NewBias(), param.Output()); + if (!fusion_has_been_computed) { + math::ScaleAddChannelWise(param.Output(), param.NewScale(), + param.NewBias(), param.Output()); + } } template class ConvBNReluKernel; diff --git a/src/operators/kernel/arm/convolution/conv_kernel.cpp b/src/operators/kernel/arm/convolution/conv_kernel.cpp index 7a3e8471310fef451e15afbe967b692bf15c87fa..f5dc35cdf60fb58999a6dde8abb696d92936eb7b 100644 --- a/src/operators/kernel/arm/convolution/conv_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_kernel.cpp @@ -55,11 +55,11 @@ void ConvKernel::Compute(const ConvParam ¶m) { GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + GemmConv1x1s1(param, nullptr, false, false); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, nullptr, false, false); break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", diff --git a/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp index c9c42639b7f70a35f27f70f77fdb5a38e955972d..477bd55e553f6ba630525877c576ea82269add9e 100644 --- a/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp @@ -46,11 +46,11 @@ void ConvReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + GemmConv1x1s1(param, nullptr, false, false); break; case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: - SlidingwindowConv3x3(param); + SlidingwindowConv3x3(param, nullptr, false, false); break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", diff --git a/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp index 7b5bf86038ccffe0a0c6922dba687754202ec7e3..0eefeae1d1d2974c761b03541a70017f0c48d64c 100644 --- a/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp @@ -77,7 +77,7 @@ void DWConvBNReluKernel::Compute( GemmConv(param); break; case ConvParam::EXEC_GEMM1x1s1_FLOAT: - GemmConv1x1s1(param); + GemmConv1x1s1(param, nullptr, false, false); break; default: PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.cpp b/src/operators/kernel/central-arm-func/conv_arm_func.cpp index 20065f46f3164ef38ae273a359fa78998863994d..43bdbd532a8ee2d615c4ef26d9fd1e7a37edf62d 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.cpp +++ b/src/operators/kernel/central-arm-func/conv_arm_func.cpp @@ -140,7 +140,8 @@ void GemmConv(const ConvParam ¶m) { } template -void GemmConv1x1s1(const ConvParam ¶m) { +void GemmConv1x1s1(const ConvParam ¶m, const float *bias, bool is_bias, + bool is_relu) { const Tensor *input = param.Input(); Tensor filter = *param.transformed_filter_; Tensor *output = param.Output(); @@ -156,8 +157,6 @@ void GemmConv1x1s1(const ConvParam ¶m) { const int hout = output->dims()[2]; const int wout = output->dims()[3]; const float *weights = filter.mutable_data(); - const float *bias = nullptr; - int channel_size_out = wout * hout; int channel_size_in = win * hin; const int group = param.Groups(); @@ -165,8 +164,16 @@ void GemmConv1x1s1(const ConvParam ¶m) { const int n = hout * wout; const int k = chin / group; - bool flag_relu = false; - bool flag_bias = false; + bool flag_relu = true; + bool flag_bias = true; + + if (!is_bias) { + bias = nullptr; + flag_bias = false; + } + if (!is_relu) { + flag_relu = false; + } ARMArch arch = framework::CPUContext::Context()->get_arch(); int hblock = math::get_hblock(arch); @@ -322,7 +329,8 @@ void DepthwiseConv5x5(const ConvParam ¶m) { } template -void SlidingwindowConv3x3(const ConvParam ¶m) { +void SlidingwindowConv3x3(const ConvParam ¶m, const float *bias, + bool is_bias, bool is_relu) { const Tensor *input = param.Input(); const Tensor *filter = param.Filter(); const std::vector &paddings = param.Paddings(); @@ -334,23 +342,29 @@ void SlidingwindowConv3x3(const ConvParam ¶m) { // math::SlidingwindowConv3x3s1(input, filter, paddings, // output); math::SlidingwindowConv3x3s1Faster( - input, param.transformed_filter_, paddings, output); + input, param.transformed_filter_, paddings, output, bias, is_bias, + is_relu); } else if (strides[0] == 2) { // math::SlidingwindowConv3x3s2(input, filter, paddings, // output); math::SlidingwindowConv3x3s2Faster( - input, param.transformed_filter_, paddings, output); + input, param.transformed_filter_, paddings, output, bias, is_bias, + is_relu); } else { GemmConv(param); } } template void GemmConv(const ConvParam ¶m); -template void GemmConv1x1s1(const ConvParam ¶m); +template void GemmConv1x1s1(const ConvParam ¶m, + const float *bias, bool is_bias, + bool is_relu); template void WinogradConv3x3<8, 3>(const ConvParam ¶m); template void DepthwiseConv3x3(const ConvParam ¶m); template void DepthwiseConv5x5(const ConvParam ¶m); -template void SlidingwindowConv3x3(const ConvParam ¶m); +template void SlidingwindowConv3x3(const ConvParam ¶m, + const float *bias, + bool is_bias, bool is_relu); template void GemmConv(const ConvParam ¶m); #ifndef __aarch64__ diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h index f2c1070fa0f5e11f8f92cef7f8089ada00b73216..89b91f9d11de781993e357117e4188429513960f 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -33,7 +33,8 @@ template void GemmConv(const ConvParam ¶m); template -void GemmConv1x1s1(const ConvParam ¶m); +void GemmConv1x1s1(const ConvParam ¶m, const float *bias, bool is_bias, + bool is_relu); template void WinogradConv3x3(const ConvParam ¶m); @@ -45,7 +46,8 @@ template void DepthwiseConv5x5(const ConvParam ¶m); template -void SlidingwindowConv3x3(const ConvParam ¶m); +void SlidingwindowConv3x3(const ConvParam ¶m, const float *bias, + bool is_bias, bool is_relu); void FasterDepthwiseConv3x3_bias_relu(const ConvParam ¶m, const float *bias, bool flag_relu); diff --git a/src/operators/kernel/conv_add_bn_relu_kernel.h b/src/operators/kernel/conv_add_bn_relu_kernel.h index d7ec6d2933e6f3ecfafd28e18a5b9b7633399e8d..2174a6f12571abd0fe8dbe71d693d1a77493a531 100644 --- a/src/operators/kernel/conv_add_bn_relu_kernel.h +++ b/src/operators/kernel/conv_add_bn_relu_kernel.h @@ -39,6 +39,8 @@ class ConvAddBNReluKernel private: bool could_use_faster_depthwise_conv_ = false; + bool use_gemm_add_bn_relu = false; + bool use_slidingwindow_add_bn_relu = false; }; } // namespace operators diff --git a/src/operators/kernel/conv_bn_relu_kernel.h b/src/operators/kernel/conv_bn_relu_kernel.h index f63b61ab09f90c8c40738cbe94ec6ebcff9420ff..aef735a524f49d7a62465c1f235b5b843c4117d1 100644 --- a/src/operators/kernel/conv_bn_relu_kernel.h +++ b/src/operators/kernel/conv_bn_relu_kernel.h @@ -36,6 +36,10 @@ class ConvBNReluKernel public: void Compute(const FusionConvBNReluParam ¶m); bool Init(FusionConvBNReluParam *param); + + private: + bool use_gemm_bn_relu = false; + bool use_slidingwindow_bn_relu = false; }; } // namespace operators diff --git a/src/operators/math/gemm/gemm1x1s1.cpp b/src/operators/math/gemm/gemm1x1s1.cpp index 4beae5833b17df00fb78090fbfae9b02cf77d495..bfc24827659f5963ce903c6711a026eba1ec061e 100644 --- a/src/operators/math/gemm/gemm1x1s1.cpp +++ b/src/operators/math/gemm/gemm1x1s1.cpp @@ -18,7 +18,6 @@ limitations under the License. */ #include "operators/math/gemm/gemm1x1s1.h" #include #include "framework/context.h" -#include "iostream" namespace paddle_mobile { namespace operators { diff --git a/src/operators/math/slidingwindow_conv3x3.cpp b/src/operators/math/slidingwindow_conv3x3.cpp index 0452a290275d72acc80e193c41c3bb0e3ffc5ff0..0f4fbcbd9350a0d0d70dab0eb6ca41e4d5684e1d 100644 --- a/src/operators/math/slidingwindow_conv3x3.cpp +++ b/src/operators/math/slidingwindow_conv3x3.cpp @@ -3710,12 +3710,15 @@ void SlidingwindowConv3x3s2(const framework::Tensor *input, template <> void SlidingwindowConv3x3s1Faster( const framework::Tensor *input, framework::Tensor *filter, - const std::vector &paddings, framework::Tensor *output) { + const std::vector &paddings, framework::Tensor *output, + const float *bias, bool is_bias, bool is_relu) { const float *din = input->data(); float *dout = output->mutable_data(); const float *weights = filter->mutable_data(); - const float *bias = nullptr; - bool relu = false; + if (!is_bias) { + bias = nullptr; + } + bool relu = is_relu; const int num = input->dims()[0]; const int chin = input->dims()[1]; const int hin = input->dims()[2]; @@ -4623,12 +4626,15 @@ void SlidingwindowConv3x3s1Faster( template <> void SlidingwindowConv3x3s2Faster( const framework::Tensor *input, framework::Tensor *filter, - const std::vector &paddings, framework::Tensor *output) { + const std::vector &paddings, framework::Tensor *output, + const float *bias, bool is_bias, bool is_relu) { const float *din = input->data(); float *dout = output->mutable_data(); const float *weights = filter->mutable_data(); - const float *bias = nullptr; - bool relu = false; + if (!is_bias) { + bias = nullptr; + } + bool relu = is_relu; const int num = input->dims()[0]; const int chin = input->dims()[1]; const int hin = input->dims()[2]; diff --git a/src/operators/math/slidingwindow_conv3x3.h b/src/operators/math/slidingwindow_conv3x3.h index 9ef8fd2b3fff4c449eea3b41013862dd76c5d3c0..8bdd682cdb3075767fd2ed2119ebf22b7158da8a 100644 --- a/src/operators/math/slidingwindow_conv3x3.h +++ b/src/operators/math/slidingwindow_conv3x3.h @@ -37,13 +37,15 @@ template void SlidingwindowConv3x3s1Faster(const framework::Tensor *input, framework::Tensor *filter, const std::vector &paddings, - framework::Tensor *output); + framework::Tensor *output, const float *bias, + bool is_bias, bool is_relu); template void SlidingwindowConv3x3s2Faster(const framework::Tensor *input, framework::Tensor *filter, const std::vector &paddings, - framework::Tensor *output); + framework::Tensor *output, const float *bias, + bool is_bias, bool is_relu); } // namespace math } // namespace operators } // namespace paddle_mobile