diff --git a/src/operators/fusion_conv_add_add_prelu.cpp b/src/operators/fusion_conv_add_add_prelu.cpp index 573b8f763f7ea4da7660dea628b350b0a0aead0f..5104c989415eee46e66bdbf419fc6ecf7a2baa34 100644 --- a/src/operators/fusion_conv_add_add_prelu.cpp +++ b/src/operators/fusion_conv_add_add_prelu.cpp @@ -18,33 +18,33 @@ limitations under the License. */ #include "operators/math/conv_func.h" namespace paddle_mobile { - namespace operators { - - template - void FusionConvAddAddPReluOp::InferShape() const { - auto in_dims = this->param_.Input()->dims(); - auto filter_dims = this->param_.Filter()->dims(); - const std::vector &strides = this->param_.Strides(); - std::vector paddings = this->param_.Paddings(); - int groups = this->param_.Groups(); - std::vector dilations = this->param_.Dilations(); - - PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && - dilations.size() == paddings.size() && - paddings.size() == strides.size()), - "ConvParam is not suitable"); - - std::vector output_shape({in_dims[0], filter_dims[0]}); - for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back( - math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], - paddings[i], strides[i])); - } - framework::DDim ddim = framework::make_ddim(output_shape); - this->param_.Output()->Resize(ddim); - } - - } // namespace operators +namespace operators { + +template +void FusionConvAddAddPReluOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back( + math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], + paddings[i], strides[i])); + } + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} // namespace operators } // namespace paddle_mobile namespace ops = paddle_mobile::operators; diff --git a/src/operators/fusion_conv_add_add_prelu_op.h b/src/operators/fusion_conv_add_add_prelu_op.h index 8aa51872f4d285fe7541bf9a49ed59dbe9d26136..d91b4d28d728efb4ecf817294f37e67ac19cfe72 100644 --- a/src/operators/fusion_conv_add_add_prelu_op.h +++ b/src/operators/fusion_conv_add_add_prelu_op.h @@ -24,62 +24,64 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { - - class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher { - public: - FusionConvAddAddPReluOpMatcher() { - node_ = framework::Node(G_OP_TYPE_CONV); - node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > - std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) - > std::make_shared(G_OP_TYPE_PRELU); - } - - void FolderNodes( - framework::Node *node, - std::vector> *removed_nodes) { - node->Folder(node_.Depth(), Type(), - {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"Out", "addOut"},{"X", "addX"}}}, - {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}} - }, - - removed_nodes); - } - std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; } - - std::vector> NeedCheck() { - DLOG << " conv add add prelu check add X "; - return {{2, "Y"}, {2, "X"}}; - } - }; - - template - class FusionConvAddAddPReluOp : public framework::OperatorWithKernel< - DeviceType, FusionConvAddAddPReluParam, - operators::ConvAddAddPReluKernel> { - public: - FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, - const framework::AttributeMap &attrs, - std::shared_ptr scope) - : framework::OperatorWithKernel< - DeviceType, FusionConvAddAddPReluParam, - operators::ConvAddAddPReluKernel>(type, inputs, outputs, - attrs, scope) {} - - using framework::OperatorWithKernel< - DeviceType, FusionConvAddAddPReluParam, - operators::ConvAddAddPReluKernel>::OperatorWithKernel; - void InferShape() const override; - protected: - }; +namespace operators { + +class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher { + public: + FusionConvAddAddPReluOpMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_PRELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, + {{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}}, + {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}}, + + removed_nodes); + } + std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; } + + std::vector> NeedCheck() { + DLOG << " conv add add prelu check add X "; + return {{2, "Y"}, {2, "X"}}; + } +}; + +template +class FusionConvAddAddPReluOp + : public framework::OperatorWithKernel< + DeviceType, FusionConvAddAddPReluParam, + operators::ConvAddAddPReluKernel> { + public: + FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionConvAddAddPReluParam, + operators::ConvAddAddPReluKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, FusionConvAddAddPReluParam, + operators::ConvAddAddPReluKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; #ifdef PADDLE_MOBILE_CPU #ifndef CONV_ADD_ADD_PRELU_REGISTER #define CONV_ADD_ADD_PRELU_REGISTER - static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar( - new FusionConvAddAddPReluOpMatcher()); +static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar( + new FusionConvAddAddPReluOpMatcher()); #endif #endif @@ -87,7 +89,7 @@ namespace paddle_mobile { #endif #ifdef PADDLE_MOBILE_FPGA - #ifndef CONV_ADD_ADD_PRELU_REGISTER +#ifndef CONV_ADD_ADD_PRELU_REGISTER #define CONV_ADD_ADD_PRELU_REGISTER static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar( new FusionConvAddAddPReluOpMatcher()); @@ -95,7 +97,7 @@ static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar( #endif - } // namespace operators +} // namespace operators } // namespace paddle_mobile #ifdef PADDLE_MOBILE_CPU diff --git a/src/operators/fusion_conv_add_prelu_op.cpp b/src/operators/fusion_conv_add_prelu_op.cpp index 257ad62d0b517112a4f4c5dfcdc2030d30a2a71d..0cd30ae6888cd2372b0968717de14f9ca3c72e18 100644 --- a/src/operators/fusion_conv_add_prelu_op.cpp +++ b/src/operators/fusion_conv_add_prelu_op.cpp @@ -18,38 +18,38 @@ limitations under the License. */ #include "operators/math/conv_func.h" namespace paddle_mobile { - namespace operators { - - template - void FusionConvAddPReluOp::InferShape() const { - auto in_dims = this->param_.Input()->dims(); - auto filter_dims = this->param_.Filter()->dims(); - const std::vector &strides = this->param_.Strides(); - std::vector paddings = this->param_.Paddings(); - int groups = this->param_.Groups(); - std::vector dilations = this->param_.Dilations(); - - PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && - dilations.size() == paddings.size() && - paddings.size() == strides.size()), - "ConvParam is not suitable"); - - std::vector output_shape({in_dims[0], filter_dims[0]}); - for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back( - math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], - paddings[i], strides[i])); - } - framework::DDim ddim = framework::make_ddim(output_shape); - this->param_.Output()->Resize(ddim); - } - - } // namespace operators +namespace operators { + +template +void FusionConvAddPReluOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back( + math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], + paddings[i], strides[i])); + } + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} // namespace operators } // namespace paddle_mobile namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU -REGISTER_OPERATOR_CPU(fusion_conv_add_prelu,ops::FusionConvAddPReluOp); +REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp); #endif #ifdef PADDLE_MOBILE_MALI_GPU #endif diff --git a/src/operators/fusion_conv_add_prelu_op.h b/src/operators/fusion_conv_add_prelu_op.h index fd005120633951e6b44df45d66e60d0a04ff7783..4c968be68230fe6252e72655f47b2a347f720526 100644 --- a/src/operators/fusion_conv_add_prelu_op.h +++ b/src/operators/fusion_conv_add_prelu_op.h @@ -24,59 +24,59 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { - - class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher { - public: - FusionConvAddPReluOpMatcher() { - node_ = framework::Node(G_OP_TYPE_CONV); - node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > - std::make_shared(G_OP_TYPE_PRELU); - } - - void FolderNodes( - framework::Node *node, - std::vector> *removed_nodes) { - node->Folder(node_.Depth(), Type(), - {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, - {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}} - - }, - - - removed_nodes); - } - std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; } - }; - - template - class FusionConvAddPReluOp : public framework::OperatorWithKernel< - DeviceType, FusionConvAddPReluParam, - operators::ConvAddPReluKernel> { - public: - FusionConvAddPReluOp(const string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, - const framework::AttributeMap &attrs, - std::shared_ptr scope) - : framework::OperatorWithKernel< - DeviceType, FusionConvAddPReluParam, - operators::ConvAddPReluKernel>(type, inputs, outputs, - attrs, scope) {} - - using framework::OperatorWithKernel< - DeviceType, FusionConvAddPReluParam, - operators::ConvAddPReluKernel>::OperatorWithKernel; - void InferShape() const override; - - protected: - }; +namespace operators { + +class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher { + public: + FusionConvAddPReluOpMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_PRELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, + {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}} + + }, + + removed_nodes); + } + std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; } +}; + +template +class FusionConvAddPReluOp + : public framework::OperatorWithKernel< + DeviceType, FusionConvAddPReluParam, + operators::ConvAddPReluKernel> { + public: + FusionConvAddPReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionConvAddPReluParam, + operators::ConvAddPReluKernel>(type, inputs, outputs, + attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, FusionConvAddPReluParam, + operators::ConvAddPReluKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; #ifdef PADDLE_MOBILE_CPU #ifndef CONV_ADD_PRELU_REGISTER #define CONV_ADD_PRELU_REGISTER - static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar( - new FusionConvAddPReluOpMatcher()); +static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar( + new FusionConvAddPReluOpMatcher()); #endif #endif @@ -84,7 +84,7 @@ namespace paddle_mobile { #endif #ifdef PADDLE_MOBILE_FPGA - #ifndef CONV_ADD_PRELU_REGISTER +#ifndef CONV_ADD_PRELU_REGISTER #define CONV_ADD_PRELU_REGISTER static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar( new FusionConvAddPReluOpMatcher()); @@ -92,7 +92,7 @@ static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar( #endif - } // namespace operators +} // namespace operators } // namespace paddle_mobile #ifdef PADDLE_MOBILE_CPU diff --git a/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp b/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp index 5f83895f4bea96f67fb7f00cb980fdb0da5a1ec0..74b88f5d4f5e24b1401803c8c48d99319f412d1b 100644 --- a/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_add_prelu_kernel.cpp @@ -18,21 +18,22 @@ limitations under the License. */ #include "operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h" namespace paddle_mobile { - namespace operators { - - template <> - bool ConvAddAddPReluKernel::Init(FusionConvAddAddPReluParam *param) { - return true; - } - - template <> - void ConvAddAddPReluKernel::Compute( - const FusionConvAddAddPReluParam ¶m) const { - ConvAddAddPReluCompute(param); - } - template class ConvAddAddPReluKernel; - - } // namespace operators +namespace operators { + +template <> +bool ConvAddAddPReluKernel::Init( + FusionConvAddAddPReluParam *param) { + return true; +} + +template <> +void ConvAddAddPReluKernel::Compute( + const FusionConvAddAddPReluParam ¶m) const { + ConvAddAddPReluCompute(param); +} +template class ConvAddAddPReluKernel; + +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/arm/conv_add_prelu_kernel.cpp b/src/operators/kernel/arm/conv_add_prelu_kernel.cpp index 0a2fe4d6b98ec0c30250003f90f8f2fc31d3cc51..5930cfdcfc0f983c9f07754113dc37d5122d19f0 100644 --- a/src/operators/kernel/arm/conv_add_prelu_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_prelu_kernel.cpp @@ -18,21 +18,21 @@ limitations under the License. */ #include "operators/kernel/central-arm-func/conv_add_prelu_arm_func.h" namespace paddle_mobile { - namespace operators { +namespace operators { - template <> - bool ConvAddPReluKernel::Init(FusionConvAddPReluParam *param) { - return true; - } +template <> +bool ConvAddPReluKernel::Init(FusionConvAddPReluParam *param) { + return true; +} - template <> - void ConvAddPReluKernel::Compute( - const FusionConvAddPReluParam ¶m) const { - ConvAddPReluCompute(param); - } - template class ConvAddPReluKernel; +template <> +void ConvAddPReluKernel::Compute( + const FusionConvAddPReluParam ¶m) const { + ConvAddPReluCompute(param); +} +template class ConvAddPReluKernel; - } // namespace operators +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h index 42ada2aa28b904914b6f2ba21cf8733a415a9d68..a19c67e68366fc57a305e0dbb955229a763737d9 100644 --- a/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h @@ -23,115 +23,118 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { - - template - void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam ¶m) { - const Tensor *input = param.Input(); - Tensor filter = *param.Filter(); - Tensor bias = *param.Bias(); - Tensor bias1 = *param.Bias1(); - int axis = param.Axis(); - Tensor *output = param.Output(); - - float *biase_data = bias.data(); - - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - Tensor aa = *param.InputAlpha(); - float *p = aa.data(); - - std::string mode = param.Mode(); - const int batch_size = static_cast(input->dims()[0]); - - std::vector filter_shape_vec(framework::vectorize(filter.dims())); - - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = - math::IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - Tensor bias1_batch = bias1.Slice(i,i+1).Resize(output_matrix_shape); - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step); - float *biase_data1 = bias1_slice.data(); -// int n = bias1_slice.dims()[0]; -// int m = bias1_slice.dims()[1]; -// for(int i=0;i(filter_slice, false, col_matrix, false, -// static_cast(1), &out_slice, -// static_cast(1), true, biase_data); - math::matmulWithPRelu(filter_slice, false, col_matrix, false, - &out_slice, p,mode, biase_data,biase_data1); - } - } - } - - } // namespace operators +namespace operators { + +template +void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam ¶m) { + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor bias = *param.Bias(); + Tensor bias1 = *param.Bias1(); + int axis = param.Axis(); + Tensor *output = param.Output(); + + float *biase_data = bias.data(); + + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + Tensor aa = *param.InputAlpha(); + float *p = aa.data(); + + std::string mode = param.Mode(); + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + Tensor bias1_batch = bias1.Slice(i, i + 1).Resize(output_matrix_shape); + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step); + float *biase_data1 = bias1_slice.data(); + // int n = bias1_slice.dims()[0]; + // int m = bias1_slice.dims()[1]; + // for(int i=0;i(filter_slice, false, col_matrix, + // false, + // static_cast(1), + // &out_slice, + // static_cast(1), true, + // biase_data); + math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice, + p, mode, biase_data, biase_data1); + } + } +} + +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h index 8066c5003cac34d0d22d36d71cac8951d84d4a3e..df63379d967606e15106937534bb82496ee83b4e 100644 --- a/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_prelu_arm_func.h @@ -23,105 +23,108 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { - - template - void ConvAddPReluCompute(const FusionConvAddPReluParam ¶m) { - const Tensor *input = param.Input(); - Tensor filter = *param.Filter(); - Tensor bias = *param.Bias(); -// DLOG<<"yangfei"; -// DLOG<(); - - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - Tensor aa = *param.InputAlpha(); - float *p = aa.data(); - std::string mode = param.Mode(); - const int batch_size = static_cast(input->dims()[0]); - - std::vector filter_shape_vec(framework::vectorize(filter.dims())); - - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = - math::IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); -// math::matmul(filter_slice, false, col_matrix, false, -// static_cast(1), &out_slice, -// static_cast(1), true, biase_data); - math::matmulWithPRelu(filter_slice, false, col_matrix, false, - &out_slice, p,mode, biase_data, nullptr); - } - } - } - - } // namespace operators +namespace operators { + +template +void ConvAddPReluCompute(const FusionConvAddPReluParam ¶m) { + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor bias = *param.Bias(); + // DLOG<<"yangfei"; + // DLOG<(); + + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + Tensor aa = *param.InputAlpha(); + float *p = aa.data(); + std::string mode = param.Mode(); + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + // math::matmul(filter_slice, false, col_matrix, + // false, + // static_cast(1), + // &out_slice, + // static_cast(1), true, + // biase_data); + math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice, + p, mode, biase_data, nullptr); + } + } +} + +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/conv_add_add_prelu_kernel.h b/src/operators/kernel/conv_add_add_prelu_kernel.h index c41f584d93ddc1c440262b006867577c5eac7211..5715cd46d5a6c7e80ab5ff77ba83c7973e1db811 100644 --- a/src/operators/kernel/conv_add_add_prelu_kernel.h +++ b/src/operators/kernel/conv_add_add_prelu_kernel.h @@ -26,20 +26,20 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { +namespace operators { - using framework::DDim; - using framework::OpKernelBase; +using framework::DDim; +using framework::OpKernelBase; - template - class ConvAddAddPReluKernel - : public OpKernelBase> { - public: - void Compute(const FusionConvAddAddPReluParam ¶m) const; - bool Init(FusionConvAddAddPReluParam *param); - }; +template +class ConvAddAddPReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionConvAddAddPReluParam ¶m) const; + bool Init(FusionConvAddAddPReluParam *param); +}; - } // namespace operators +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/conv_add_prelu_kernel.h b/src/operators/kernel/conv_add_prelu_kernel.h index 8d8f91676d92f6ba2217ebcd22e5dae3b05ad498..a109f84cf09b4d0e2469a1885b902c0f70acc6c8 100644 --- a/src/operators/kernel/conv_add_prelu_kernel.h +++ b/src/operators/kernel/conv_add_prelu_kernel.h @@ -26,20 +26,20 @@ limitations under the License. */ #include "operators/op_param.h" namespace paddle_mobile { - namespace operators { +namespace operators { - using framework::DDim; - using framework::OpKernelBase; +using framework::DDim; +using framework::OpKernelBase; - template - class ConvAddPReluKernel - : public OpKernelBase> { - public: - void Compute(const FusionConvAddPReluParam ¶m) const; - bool Init(FusionConvAddPReluParam *param); - }; +template +class ConvAddPReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionConvAddPReluParam ¶m) const; + bool Init(FusionConvAddPReluParam *param); +}; - } // namespace operators +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index 8e0ca760397f4e7176e0f4dbb730d056fd20e629..c9084a9d807ab43ad60b1661b5b1a7ecc67c8479 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -3172,7 +3172,7 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda, int max_threads = 1; #endif - int L1 = 16 / max_threads * 1024; + int L1 = 32 * 1024; KC = k; if (m > n) { // 对 A 分块 diff --git a/src/operators/math/math_function.cpp b/src/operators/math/math_function.cpp index e48e4eb35b1c533e0befb82e6fd384913a3659c8..c5192441b2e89f4a5346f5d580fe87890becc432 100644 --- a/src/operators/math/math_function.cpp +++ b/src/operators/math/math_function.cpp @@ -110,9 +110,8 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a, int K = (!trans_a) ? dim_a[1] : dim_a[0]; #ifdef _OPENMP - xsSgemmWithPRelu_omp(M, N, K, matrix_a.data(), K, - matrix_b.data(), N, matrix_out->data(), N, - p, mode, bias, bias1); + SgemmWithPRelu_omp(M, N, K, matrix_a.data(), K, matrix_b.data(), + N, matrix_out->data(), N, p, mode, bias, bias1); #else SgemmWithPRelu(M, N, K, matrix_a.data(), K, matrix_b.data(), N, matrix_out->data(), N, p, mode, bias, bias1);