From d4c0395fe4d3ac6f5af0908eb1862fdf0f360a12 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Wed, 20 Jun 2018 17:17:25 +0800 Subject: [PATCH] add conv_add_relu op --- CMakeLists.txt | 10 +- src/common/types.cpp | 3 +- src/operators/conv_op.cpp | 3 +- src/operators/conv_op.h | 7 - src/operators/depthwise_conv_op.cpp | 3 +- src/operators/fusion_conv_add.cpp | 3 +- src/operators/fusion_conv_add.h | 11 +- src/operators/fusion_conv_add_relu_op.cpp | 41 +++ src/operators/fusion_conv_add_relu_op.h | 28 +- src/operators/fusion_fc_op.h | 2 +- src/operators/kernel/arm/conv_add_kernel.cpp | 4 +- .../kernel/arm/conv_add_relu_kernel.cpp | 117 ++++++ src/operators/kernel/conv_add_kernel.h | 69 +--- src/operators/kernel/conv_add_relu_kernel.h | 43 +++ .../kernel/mali/batchnorm_kernel.cpp | 31 ++ src/operators/kernel/mali/conv_kernel.cpp | 1 + src/operators/math/conv_func.h | 103 ++++++ src/operators/math/gemm.cpp | 338 +++++++++++++++++- src/operators/math/gemm.h | 6 + src/operators/math/math_function.cpp | 16 +- src/operators/math/math_function.h | 2 +- src/operators/op_param.h | 12 +- test/CMakeLists.txt | 4 + test/executor_for_test.h | 8 +- test/framework/test_load.cpp | 7 +- test/operators/test_conv_add_relu_op.cpp | 44 +++ test/operators/test_cov_op.cpp | 8 +- 27 files changed, 811 insertions(+), 113 deletions(-) create mode 100644 src/operators/kernel/arm/conv_add_relu_kernel.cpp create mode 100644 src/operators/kernel/conv_add_relu_kernel.h create mode 100644 src/operators/kernel/mali/batchnorm_kernel.cpp create mode 100644 src/operators/math/conv_func.h create mode 100644 test/operators/test_conv_add_relu_op.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1007e46bf5..9c3ff01e5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,9 +12,13 @@ option(FPGA "fpga" OFF) if (CPU) add_definitions(-DPADDLE_MOBILE_CPU) -elseif (MALI_GPU) +endif() + +if (MALI_GPU) add_definitions(-DPADDLE_MOBILE_MALI_GPU) -elseif(FPGA) +endif() + +if(FPGA) add_definitions(-DPADDLE_MOBILE_FPGA) endif() @@ -94,6 +98,7 @@ if (googlenet) add_definitions(-DPOOL_OP) add_definitions(-DRELU_OP) add_definitions(-DFUSION_CONVADD_OP) + add_definitions(-DFUSION_CONVADD_RELU_OP) elseif (mobilenet) add_definitions(-DCONV_OP) add_definitions(-DELEMENTWISEADD_OP) @@ -145,6 +150,7 @@ else () add_definitions(-DSIGMOID_OP) add_definitions(-DSOFTMAX_OP) add_definitions(-DTRANSPOSE_OP) + add_definitions(-DFUSION_CONVADD_RELU_OP) endif() if (IS_IOS) diff --git a/src/common/types.cpp b/src/common/types.cpp index 2193b440a6..a6f32762d3 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -63,6 +63,7 @@ std::unordered_map< {G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}}, {G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}}, {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}, - {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}}; + {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp index b4910eb26e..824ab9ee31 100644 --- a/src/operators/conv_op.cpp +++ b/src/operators/conv_op.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include "operators/conv_op.h" #include #include "framework/op_proto_maker.h" +#include "operators/math/conv_func.h" #include "framework/op_registry.h" namespace paddle_mobile { @@ -38,7 +39,7 @@ void ConvOp::InferShape() const { std::vector output_shape({in_dims[0], filter_dims[0]}); for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], paddings[i], strides[i])); } diff --git a/src/operators/conv_op.h b/src/operators/conv_op.h index fe723c574e..f8e8952d47 100644 --- a/src/operators/conv_op.h +++ b/src/operators/conv_op.h @@ -43,13 +43,6 @@ class ConvOp private: }; -inline int ConvOutputSize(int input_size, int filter_size, int dilation, - int padding, int stride) { - const int dkernel = dilation * (filter_size - 1) + 1; - int output_size = (input_size + 2 * padding - dkernel) / stride + 1; - return output_size; -} - } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp index be3b9a0ca8..b127424bce 100644 --- a/src/operators/depthwise_conv_op.cpp +++ b/src/operators/depthwise_conv_op.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include "operators/depthwise_conv_op.h" #include #include "framework/op_proto_maker.h" +#include "operators/math/conv_func.h" #include "framework/op_registry.h" #include "operators/conv_op.h" @@ -39,7 +40,7 @@ void DepthwiseConvOp::InferShape() const { std::vector output_shape({in_dims[0], filter_dims[0]}); for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], paddings[i], strides[i])); } diff --git a/src/operators/fusion_conv_add.cpp b/src/operators/fusion_conv_add.cpp index 1f98cbdd8e..c8519a7e00 100644 --- a/src/operators/fusion_conv_add.cpp +++ b/src/operators/fusion_conv_add.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #ifdef FUSION_CONVADD_OP +#include "operators/math/conv_func.h" #include "operators/fusion_conv_add.h" namespace paddle_mobile { @@ -35,7 +36,7 @@ void FushionConvAddOp::InferShape() const { std::vector output_shape({in_dims[0], filter_dims[0]}); for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], paddings[i], strides[i])); } diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add.h index 56ce40a53f..1c4c898a8e 100644 --- a/src/operators/fusion_conv_add.h +++ b/src/operators/fusion_conv_add.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include -#include "framework/operator.h" -#include "framework/program/program-optimize/fusion_op_register.h" #include "op_param.h" +#include "framework/operator.h" #include "operators/kernel/conv_add_kernel.h" +#include "framework/program/program-optimize/fusion_op_register.h" namespace paddle_mobile { namespace operators { @@ -67,13 +67,6 @@ class FushionConvAddOp : public framework::OperatorWithKernel< protected: }; -inline int ConvOutputSize(int input_size, int filter_size, int dilation, - int padding, int stride) { - const int dkernel = dilation * (filter_size - 1) + 1; - int output_size = (input_size + 2 * padding - dkernel) / stride + 1; - return output_size; -} - #ifdef PADDLE_MOBILE_CPU static framework::FusionOpRegistrar convadd_registrar( new FusionConvAddMatcher()); diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp index bf33db7d78..e7f18a7199 100644 --- a/src/operators/fusion_conv_add_relu_op.cpp +++ b/src/operators/fusion_conv_add_relu_op.cpp @@ -15,5 +15,46 @@ limitations under the License. */ #ifdef CONVADDRELU_OP #include "fusion_conv_add_relu_op.h" +#include "operators/math/conv_func.h" + +namespace paddle_mobile { +namespace operators { + +template +void FusionConvAddReluOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back(math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + dilations[i], paddings[i], + strides[i])); + } + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} +} + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fusion_conv_add_relu); +REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index eb8dd205b4..c5b13d23fb 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -17,6 +17,8 @@ limitations under the License. */ #pragma once #include "framework/operator.h" +#include "operators/op_param.h" +#include "operators/kernel/conv_add_relu_kernel.h" #include "framework/program/program-optimize/fusion_op_register.h" namespace paddle_mobile { @@ -33,22 +35,34 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { void FolderNodes( framework::Node *node, std::vector> *removed_nodes) { - std::vector> origin_descs = - node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), - {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes); + {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Y"}}}, removed_nodes); } std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; } }; -class ConvAddReluOp { +template +class FusionConvAddReluOp: public framework::OperatorWithKernel< + DeviceType, FushionConvAddReluParam, + operators::ConvAddReluKernel> { public: - private: + FusionConvAddReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, FushionConvAddReluParam, + operators::ConvAddReluKernel>::OperatorWithKernel; + void InferShape() const override; + protected: }; #ifdef PADDLE_MOBILE_CPU -// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar( -// new FushionConvAddReluOpMatcher()); +//static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new FushionConvAddReluOpMatcher()); #endif #ifdef PADDLE_MOBILE_MALI_GPU #endif diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index fbdad1e79c..b1c255d7ab 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -70,7 +70,7 @@ class FushionFcOp : public framework::OperatorWithKernel< static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif #ifdef PADDLE_MOBILE_MALI_GPU -static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +//static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/kernel/arm/conv_add_kernel.cpp b/src/operators/kernel/arm/conv_add_kernel.cpp index 28874bc64c..879cc4a266 100644 --- a/src/operators/kernel/arm/conv_add_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_kernel.cpp @@ -26,7 +26,7 @@ void ConvAddKernel::Compute( Tensor bias = *param.Bias(); int axis = param.Axis(); Tensor *output = param.Output(); - expand_bias(bias, axis, output->dims()); + math::expand_bias(bias, axis, output->dims()); output->ShareDataWith(bias); int groups = param.Groups(); std::vector strides = param.Strides(); @@ -50,7 +50,7 @@ void ConvAddKernel::Compute( framework::DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); - bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); + bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; Tensor col_matrix; if (is_expand) { diff --git a/src/operators/kernel/arm/conv_add_relu_kernel.cpp b/src/operators/kernel/arm/conv_add_relu_kernel.cpp new file mode 100644 index 0000000000..4843c9aa5d --- /dev/null +++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADD_RELU_OP + +#include "operators/kernel/conv_add_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +void ConvAddReluKernel::Compute( + const FushionConvAddReluParam ¶m) const { + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor bias = *param.Bias(); + int axis = param.Axis(); + Tensor *output = param.Output(); + math::expand_bias(bias, axis, output->dims()); + output->ShareDataWith(bias); + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + math::matmul(filter_slice, false, col_matrix, false, + static_cast(1), &out_slice, + static_cast(1), true); + + } + } +} +template class ConvAddReluKernel; + +} +} + +#endif diff --git a/src/operators/kernel/conv_add_kernel.h b/src/operators/kernel/conv_add_kernel.h index 39f13e0f44..2eea496602 100644 --- a/src/operators/kernel/conv_add_kernel.h +++ b/src/operators/kernel/conv_add_kernel.h @@ -21,11 +21,12 @@ limitations under the License. */ #include #endif #include "framework/ddim.h" +#include "operators/op_param.h" #include "framework/operator.h" #include "operators/math/im2col.h" -#include "operators/math/math_function.h" #include "operators/math/vol2col.h" -#include "operators/op_param.h" +#include "operators/math/conv_func.h" +#include "operators/math/math_function.h" namespace paddle_mobile { namespace operators { @@ -33,75 +34,13 @@ namespace operators { using framework::DDim; using framework::OpKernelBase; + template class ConvAddKernel : public OpKernelBase { public: void Compute(const FushionConvAddParam ¶m) const; }; -inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) { - auto bias_ptr = bias.data(); - const DDim bias_ddim = bias.dims(); - PADDLE_MOBILE_ENFORCE(bias.dims().size() == 1, - "the bias tensor's dims size != 1") - DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1); - DDim inner_ddim = - paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size()); - int outer_size = paddle_mobile::framework::product(outer_ddim); - int inner_size = paddle_mobile::framework::product(inner_ddim); - bias.Resize(dDim); - auto new_ptr = bias.mutable_data(); - int axis_size = dDim[axis]; - -#if __ARM_NEON - for (int i = 0; i < outer_size; ++i) { - int inner_num = inner_size >> 4; - int remain = inner_size - (inner_num << 4); - float v_bias = bias_ptr[i * axis_size / outer_size]; - for (; inner_num > 0; inner_num--) { - float32x4_t v_newptr1 = vdupq_n_f32(v_bias); - float32x4_t v_newptr2 = vdupq_n_f32(v_bias); - float32x4_t v_newptr3 = vdupq_n_f32(v_bias); - float32x4_t v_newptr4 = vdupq_n_f32(v_bias); - vst1q_f32(new_ptr, v_newptr1); - new_ptr += 4; - vst1q_f32(new_ptr, v_newptr2); - new_ptr += 4; - vst1q_f32(new_ptr, v_newptr3); - new_ptr += 4; - vst1q_f32(new_ptr, v_newptr4); - new_ptr += 4; - } - for (; remain > 0; remain--) { - *new_ptr = v_bias; - new_ptr++; - } - } -#else - for (int i = 0; i < outer_size; ++i) { - float v_bias = bias_ptr[i * axis_size / outer_size]; - for (int j = 0; j < inner_size; ++j) { - new_ptr[i * inner_size + j] = v_bias; - } - } -#endif -} - -inline bool IsExpand(const std::vector &filter_dim, - const std::vector &strides, - const std::vector &paddings, - const std::vector &dilations) { - bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; - for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); - strides_1 = strides_1 && (strides[j] == 1); - padding_0 = padding_0 && (paddings[j] == 0); - dilation_1 = dilation_1 && (dilations[j] == 1); - } - - return !(filter_1 && strides_1 && padding_0 && dilation_1); -} - } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/conv_add_relu_kernel.h b/src/operators/kernel/conv_add_relu_kernel.h new file mode 100644 index 0000000000..d99f90def1 --- /dev/null +++ b/src/operators/kernel/conv_add_relu_kernel.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef FUSION_CONVADD_RELU_OP + +#include +#include "framework/ddim.h" +#include "framework/operator.h" +#include "operators/op_param.h" +#include "operators/math/im2col.h" +#include "operators/math/vol2col.h" +#include "operators/math/conv_func.h" +#include "operators/math/math_function.h" + +namespace paddle_mobile { +namespace operators { + +using framework::DDim; +using framework::OpKernelBase; + +template +class ConvAddReluKernel : public OpKernelBase { + public: + void Compute(const FushionConvAddReluParam ¶m) const; +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/mali/batchnorm_kernel.cpp b/src/operators/kernel/mali/batchnorm_kernel.cpp new file mode 100644 index 0000000000..f1bb29575a --- /dev/null +++ b/src/operators/kernel/mali/batchnorm_kernel.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef BATCHNORM_OP + +#pragma once + +#include "operators/kernel/batchnorm_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +void BatchNormKernel::Compute(const BatchNormParam ¶m) const { +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/mali/conv_kernel.cpp b/src/operators/kernel/mali/conv_kernel.cpp index 6c140145e9..c6bb6306b7 100644 --- a/src/operators/kernel/mali/conv_kernel.cpp +++ b/src/operators/kernel/mali/conv_kernel.cpp @@ -23,6 +23,7 @@ template <> void ConvKernel::Compute(const ConvParam ¶m) const { // ArmConvImplement imp; // imp.Compute(param); + param.Output()->mutable_data()[0] = 100.0; } template class ConvKernel; diff --git a/src/operators/math/conv_func.h b/src/operators/math/conv_func.h new file mode 100644 index 0000000000..e6af2172fc --- /dev/null +++ b/src/operators/math/conv_func.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#if __ARM_NEON +#include +#endif + +#include "framework/ddim.h" +#include "framework/tensor.h" + +namespace paddle_mobile { +namespace operators { +namespace math { + +using framework::DDim; +using framework::Tensor; + +inline int ConvOutputSize(int input_size, int filter_size, int dilation, + int padding, int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + int output_size = (input_size + 2 * padding - dkernel) / stride + 1; + return output_size; +} + +inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) { + auto bias_ptr = bias.data(); + const DDim bias_ddim = bias.dims(); + PADDLE_MOBILE_ENFORCE(bias.dims().size() == 1, + "the bias tensor's dims size != 1") + DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1); + DDim inner_ddim = + paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size()); + int outer_size = paddle_mobile::framework::product(outer_ddim); + int inner_size = paddle_mobile::framework::product(inner_ddim); + bias.Resize(dDim); + auto new_ptr = bias.mutable_data(); + int axis_size = dDim[axis]; + +#if __ARM_NEON + for (int i = 0; i < outer_size; ++i) { + int inner_num = inner_size >> 4; + int remain = inner_size - (inner_num << 4); + float v_bias = bias_ptr[i * axis_size / outer_size]; + for (; inner_num > 0; inner_num--) { + float32x4_t v_newptr1 = vdupq_n_f32(v_bias); + float32x4_t v_newptr2 = vdupq_n_f32(v_bias); + float32x4_t v_newptr3 = vdupq_n_f32(v_bias); + float32x4_t v_newptr4 = vdupq_n_f32(v_bias); + vst1q_f32(new_ptr, v_newptr1); + new_ptr += 4; + vst1q_f32(new_ptr, v_newptr2); + new_ptr += 4; + vst1q_f32(new_ptr, v_newptr3); + new_ptr += 4; + vst1q_f32(new_ptr, v_newptr4); + new_ptr += 4; + } + for (; remain > 0; remain--) { + *new_ptr = v_bias; + new_ptr++; + } + } +#else + for (int i = 0; i < outer_size; ++i) { + float v_bias = bias_ptr[i * axis_size / outer_size]; + for (int j = 0; j < inner_size; ++j) { + new_ptr[i * inner_size + j] = v_bias; + } + } +#endif +} + +inline bool IsExpand(const std::vector &filter_dim, + const std::vector &strides, + const std::vector &paddings, + const std::vector &dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} + +} +} +} diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index d69ae00d4a..c19fdfd57b 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -175,7 +175,48 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda, } } -// 计算一个更小的 4 * 4 的 C 矩阵分块 +// 分块矩阵乘法 +void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc, + int first_time, bool relu = false) { + int Buff_A_M = m; + int Buff_B_N = n; + + int _mc = m % MR; + int _nc = n % NR; + + if (_mc != 0) { + Buff_A_M = m + (MR - _mc); + } + + if (_nc != 0) { + Buff_B_N = n + (NR - _nc); + } + + float packedA[MC * KC]; + static float packedB[KC * NC]; + + if (first_time) { + PackMatrixB_(k, n, _nc, B, ldb, packedB); + } + PackMatrixA_(m, k, _mc, A, lda, packedA); + + int i, j, mc, nc; + + // B 取 4 列, 打包预热 + for (j = 0; j < Buff_B_N; j += NR) { + nc = (n - j) < NR ? _nc : NR; + // A 取 4 行,打包预热 + for (i = 0; i < Buff_A_M; i += MR) { + mc = (m - i) < MR ? _mc : MR; + AddDot4x4_relu(k, alpha, &packedA[i * k], 4, &packedB[j * k], k, beta, + &C(i, j), ldc, mc, nc, relu); + } + } +} + + +//计算一个更小的 4 * 4 的 C 矩阵分块 #if defined(IOS) void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, int ldb, float beta, float *C, int ldc, int mc, int nc) { @@ -226,6 +267,60 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, } } } + +void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, + int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) { + // init C + float32x4_t cv0 = vdupq_n_f32(0.0); + float32x4_t cv1 = vdupq_n_f32(0.0); + float32x4_t cv2 = vdupq_n_f32(0.0); + float32x4_t cv3 = vdupq_n_f32(0.0); + + float32x4_t av; + float32x4_t bv; + + float32x2_t av01; + float32x2_t av23; + + for (int p = 0; p < k; p += 1) { + av = vld1q_f32(a); + bv = vld1q_f32(b); + + av01 = vget_low_f32(av); + cv0 = vmlaq_lane_f32(cv0, bv, av01, 0); + cv1 = vmlaq_lane_f32(cv1, bv, av01, 1); + av23 = vget_high_f32(av); + cv2 = vmlaq_lane_f32(cv2, bv, av23, 0); + cv3 = vmlaq_lane_f32(cv3, bv, av23, 1); + + a += MR; + b += NR; + } + float32x4x4_t cv = {cv0, cv1, cv2, cv3}; + int i, j; + for (i = 0; i < mc; ++i) { + for (j = 0; j < nc; ++j) { + if (beta == 0.0) { + C(i, j) = 0.0; + } else if (beta != 1.0) { + C(i, j) *= beta; + } + if (j == 0) { + C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 0); + } else if (j == 1) { + C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 1); + } else if (j == 2) { + C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 2); + } else if (j == 3) { + C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 3); + } + if (C(i, j) < 0) { + C(i, j) = 0; + } + } + } +} + #elif defined(ARMV7) void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, int ldb, float beta, float *C, int ldc, int mc, int nc) { @@ -361,6 +456,155 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, } } } + +void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, + int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu = false) { + int kc1 = k / 2, kc2 = k % 2; + int bytes_ldc = 4 * ldc; + int flag_alpha = (alpha == 1.0) ? 1 : 2; + int flag_beta; + if (beta == 0.0) { + flag_beta = 0; + } else if (beta == 1.0) { + flag_beta = 1; + } else { + flag_beta = 2; + } + + asm volatile( + "vmov.f32 q10, #0.0 \n\t" + "vmov.f32 q11, #0.0 \n\t" + "vmov.f32 q12, #0.0 \n\t" + "vmov.f32 q13, #0.0 \n\t" + "vmov.f32 q14, #0.0 \n\t" + "subs %[kc1], %[kc1], #1 \n\t" + "blt end_kc1_%= \n\t" + "loop_kc1_%=: \n\t" + "vld1.32 {q0, q1}, [%[a]]! \n\t" + "vld1.32 {q2, q3}, [%[b]]! \n\t" + "vmla.f32 q10, q2, d0[0] \n\t" + "vmla.f32 q11, q2, d0[1] \n\t" + "vmla.f32 q12, q2, d1[0] \n\t" + "vmla.f32 q13, q2, d1[1] \n\t" + "vmla.f32 q10, q3, d2[0] \n\t" + "vmla.f32 q11, q3, d2[1] \n\t" + "vmla.f32 q12, q3, d3[0] \n\t" + "vmla.f32 q13, q3, d3[1] \n\t" + "subs %[kc1], %[kc1], #1 \n\t" + "bge loop_kc1_%= \n\t" + "end_kc1_%=: \n\t" + + "subs %[kc2], %[kc2], #1 \n\t" + "blt end_kc2_%= \n\t" + "vld1.32 {q0}, [%[a]]! \n\t" + "vld1.32 {q1}, [%[b]]! \n\t" + "vmla.f32 q10, q1, d0[0] \n\t" + "vmla.f32 q11, q1, d0[1] \n\t" + "vmla.f32 q12, q1, d1[0] \n\t" + "vmla.f32 q13, q1, d1[1] \n\t" + "end_kc2_%=: \n\t" + + "cmp %[mc], #4 \n\t" + "bne temp_%= \n\t" + "cmp %[nc], #4 \n\t" + "bne temp_%= \n\t" + + "vmov.f32 d8[0], %[alpha] \n\t" + "vmov.f32 d8[1], %[beta] \n\t" + + "cmp %[flag_alpha], #1 \n\t" + "bne alpha_%= \n\t" + + "alpha_%=: \n\t" + "vmul.f32 q10, q10, d8[0] \n\t" + "vmul.f32 q11, q11, d8[0] \n\t" + "vmul.f32 q12, q12, d8[0] \n\t" + "vmul.f32 q13, q13, d8[0] \n\t" + + "beta_%=: \n\t" + "cmp %[flag_beta], #0 \n\t" + "beq memory_%= \n\t" + + "mov r4, %[C] \n\t" + "mov r6, %[bytes_ldc]\n\t" + "vld1.32 {q0}, [r4], r6 \n\t" + "vld1.32 {q1}, [r4], r6 \n\t" + "vld1.32 {q2}, [r4], r6 \n\t" + "vld1.32 {q3}, [r4] \n\t" + "cmp %[flag_beta], #1 \n\t" + "beq beta_eq1_%= \n\t" + "bne beta_ne1_%= \n\t" + + "beta_eq1_%=: \n\t" + "vadd.f32 q10, q10, q0 \n\t" + "vadd.f32 q11, q11, q1 \n\t" + "vadd.f32 q12, q12, q2 \n\t" + "vadd.f32 q13, q13, q3 \n\t" + "b memory_%= \n\t" + + "beta_ne1_%=: \n\t" + "vmla.f32 q10, q0, d8[1] \n\t" + "vmla.f32 q11, q1, d8[1] \n\t" + "vmla.f32 q12, q2, d8[1] \n\t" + "vmla.f32 q13, q3, d8[1] \n\t" + + "memory_%=: \n\t" + "vmax.f32 q10, q10, q14 \n\t" + "vmax.f32 q11, q11, q14 \n\t" + "vmax.f32 q12, q12, q14 \n\t" + "vmax.f32 q13, q13, q14 \n\t" + "mov r5, %[C] \n\t" + "mov r6, %[bytes_ldc]\n\t" + + "vst1.32 {q10}, [r5], r6 \n\t" + "vst1.32 {q11}, [r5], r6 \n\t" + "vst1.32 {q12}, [r5], r6 \n\t" + "vst1.32 {q13}, [r5] \n\t" + "b end_%= \n\t" + + "temp_%=: \n\t" + "vst1.32 {q10, q11}, [%[ab]]!\n\t" + "vst1.32 {q12, q13}, [%[ab]] \n\t" + "end_%=: \n\t" + : + : [a] "r"(a), [b] "r"(b), [C] "r"(C), [ab] "r"(ab), [kc1] "r"(kc1), + [kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha), + [beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc), + [flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta) + : "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13", "q14"); + + + if (mc != MR || nc != NR) { + int i, j; + for (i = 0; i < mc; ++i) { + for (j = 0; j < nc; ++j) { + if (beta == 0.0) { + if (alpha != 1.0) { + C(i, j) = alpha * ab[i * MR + j]; + } else { + C(i, j) = ab[i * MR + j]; + } + } else { + if (beta != 1.0) { + C(i, j) *= beta; + } + if (alpha != 1.0) { + C(i, j) += alpha * ab[i * MR + j]; + } else { + C(i, j) += ab[i * MR + j]; + } + } + if (relu) { + if (C(i, j) < 0) { + C(i, j) = 0; + } + } + + } + } + } +} + #else void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, int ldb, float beta, float *C, int ldc, int mc, int nc) { @@ -418,6 +662,70 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, } } } + +void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, + int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu) { + float c[16] = {0}; + float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3; + + for (int p = 0; p < k; p += 1) { + reg_b0 = *b++; + reg_b1 = *b++; + reg_b2 = *b++; + reg_b3 = *b++; + + reg_a0 = *a++; + reg_a1 = *a++; + reg_a2 = *a++; + reg_a3 = *a++; + + // first row + c[0] += reg_a0 * reg_b0; + c[1] += reg_a0 * reg_b1; + c[2] += reg_a0 * reg_b2; + c[3] += reg_a0 * reg_b3; + + // second row + c[4] += reg_a1 * reg_b0; + c[5] += reg_a1 * reg_b1; + c[6] += reg_a1 * reg_b2; + c[7] += reg_a1 * reg_b3; + + // third row + c[8] += reg_a2 * reg_b0; + c[9] += reg_a2 * reg_b1; + c[10] += reg_a2 * reg_b2; + c[11] += reg_a2 * reg_b3; + + // fourth row + c[12] += reg_a3 * reg_b0; + c[13] += reg_a3 * reg_b1; + c[14] += reg_a3 * reg_b2; + c[15] += reg_a3 * reg_b3; + } + int i, j; + for (i = 0; i < mc; ++i) { + for (j = 0; j < nc; ++j) { + if (beta == 0.0) { + C(i, j) = 0.0; + } else if (beta != 1.0) { + C(i, j) *= beta; + } + if (alpha != 1.0) { + C(i, j) += alpha * c[i * MR + j]; + } else { + C(i, j) += c[i * MR + j]; + } + if (relu) { + if (C(i, j) < 0) { + C(i, j) = 0; + } + } + } + } +} + + #endif // 32位 float 矩阵乘法 @@ -443,6 +751,34 @@ void sgemm(int m, int n, int k, float alpha, const float *A, int lda, } } +void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc) { + int i, j, p, mc, nc, kc; + float beta_; + for (j = 0; j < n; j += NC) { + nc = s_min(n - j, NC); + for (p = 0; p < k; p += KC) { + kc = s_min(k - p, KC); + for (i = 0; i < m; i += MC) { + mc = s_min(m - i, MC); + if (p != 0) { + beta_ = 1.0; + } else { + beta_ = beta; + } + + if (p + KC >= k) { + InnerKernel_relu(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_, + &C(i, j), ldc, i == 0, true); + } else { + InnerKernel(mc, nc, kc, alpha, &A(i, p), lda, &B(p, j), ldb, beta_, + &C(i, j), ldc, i == 0); + } + } + } + } +} + } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/gemm.h b/src/operators/math/gemm.h index e510f4cdc9..3ac51765bf 100644 --- a/src/operators/math/gemm.h +++ b/src/operators/math/gemm.h @@ -57,10 +57,16 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda, void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc, int mc, int nc); +void AddDot4x4_relu(int k, float alpha, const float *a, int lda, const float *b, + int ldb, float beta, float *C, int ldc, int mc, int nc, bool relu); + // 32位 float 矩阵乘法 void sgemm(int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc); +void sgemm_relu(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc); + // 64位 double 矩阵乘法 void dgemm(int m, int n, int k, float alpha, const double *A, int lda, const double *B, int ldb, float beta, double *C, int ldc); diff --git a/src/operators/math/math_function.cpp b/src/operators/math/math_function.cpp index 59dd3e82d9..89faf217e9 100644 --- a/src/operators/math/math_function.cpp +++ b/src/operators/math/math_function.cpp @@ -22,7 +22,7 @@ namespace math { template <> void matmul(const framework::Tensor &matrix_a, bool trans_a, const framework::Tensor &matrix_b, bool trans_b, float alpha, - framework::Tensor *matrix_out, float beta) { + framework::Tensor *matrix_out, float beta, bool relu) { auto dim_a = matrix_a.dims(); auto dim_b = matrix_b.dims(); auto dim_out = matrix_out->dims(); @@ -41,14 +41,20 @@ void matmul(const framework::Tensor &matrix_a, bool trans_a, int N = dim_out[1]; int K = (trans_a == false) ? dim_a[1] : dim_a[0]; - sgemm(M, N, K, alpha, matrix_a.data(), K, matrix_b.data(), N, - beta, matrix_out->data(), N); + if (relu) { + sgemm_relu(M, N, K, alpha, matrix_a.data(), K, matrix_b.data(), N, + beta, matrix_out->data(), N); + } else { + sgemm(M, N, K, alpha, matrix_a.data(), K, matrix_b.data(), N, + beta, matrix_out->data(), N); + } + } template <> void matmul(const framework::Tensor &matrix_a, bool trans_a, const framework::Tensor &matrix_b, bool trans_b, - double alpha, framework::Tensor *matrix_out, double beta) { + double alpha, framework::Tensor *matrix_out, double beta, bool relu) { auto dim_a = matrix_a.dims(); auto dim_b = matrix_b.dims(); auto dim_out = matrix_out->dims(); @@ -68,6 +74,8 @@ void matmul(const framework::Tensor &matrix_a, bool trans_a, int K = (trans_a == false) ? dim_a[1] : dim_a[0]; } + + } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/math_function.h b/src/operators/math/math_function.h index bf81fc88a0..0b953ec6a3 100644 --- a/src/operators/math/math_function.h +++ b/src/operators/math/math_function.h @@ -25,7 +25,7 @@ namespace math { template void matmul(const framework::Tensor &matrix_a, bool trans_a, const framework::Tensor &matrix_b, bool trans_b, T alpha, - framework::Tensor *matrix_out, T beta); + framework::Tensor *matrix_out, T beta, bool relu = false); } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index bfefaeaa25..6bf2b41a09 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -823,7 +823,7 @@ class FushionConvAddParam : public OpParam { const int &Groups() const { return groups; } - private: + protected: Tensor *bias_; int axis_; Tensor *input_; @@ -838,5 +838,15 @@ class FushionConvAddParam : public OpParam { Print &operator<<(Print &printer, const FushionConvAddParam &conv_param); #endif +#ifdef FUSION_CONVADD_RELU_OP +class FushionConvAddReluParam: public FushionConvAddParam { + public: + FushionConvAddReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope): FushionConvAddParam(inputs, outputs, attrs, scope) { + } +}; +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cc707ded7f..c6b5fa94cd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -141,6 +141,10 @@ else () ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet paddle-mobile) + # gen test + ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-conv-add-relu-op paddle-mobile) + #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) endif() diff --git a/test/executor_for_test.h b/test/executor_for_test.h index f8c333779b..1c47410a0b 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -42,8 +42,9 @@ using std::vector; template class Executor4Test : public Executor { public: - Executor4Test(Program p, string op_type) + Executor4Test(Program p, string op_type, bool use_optimize = false) : Executor() { + this->use_optimize_ = use_optimize; this->program_ = p; if (this->use_optimize_) { this->to_predict_program_ = this->program_.optimizeProgram; @@ -61,10 +62,13 @@ class Executor4Test : public Executor { std::vector> ops = block_desc->Ops(); for (std::shared_ptr op : ops) { if (op->Type() == op_type) { + + DLOG << "匹配到: " << op->Type(); + /// test first meeting op in program std::shared_ptr> op_ptr = paddle_mobile::framework::OpRegistry< - paddle_mobile::CPU>::CreateOp(op->Type(), op->GetInputs(), + DeviceType>::CreateOp(op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), this->program_.scope); diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index 65d29345d5..3128fd41ba 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -20,8 +20,9 @@ int main() { // ../../../test/models/googlenet // ../../../test/models/mobilenet auto program = loader.Load(g_mobilenet_ssd, false, false); - // loader.Load(g_googlenet_combine + "/model", g_googlenet_combine + - // "/params", true); - program.originProgram->Description("program desc: "); +// auto program = loader.Load(g_googlenet_combine + "/model", g_googlenet_combine + +// "/params", true); + + // program.originProgram->Description("program desc: "); return 0; } diff --git a/test/operators/test_conv_add_relu_op.cpp b/test/operators/test_conv_add_relu_op.cpp new file mode 100644 index 0000000000..6563d7a0b5 --- /dev/null +++ b/test/operators/test_conv_add_relu_op.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_include.h" +#include "operators/fusion_conv_add_relu_op.h" + +int main() { + paddle_mobile::Loader loader; + // ../models/image_classification_resnet.inference.model + auto program = loader.Load(g_googlenet, true); + + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "fusion_conv_add_relu", true); + + paddle_mobile::framework::Tensor input; + GetInput(g_test_image_1x3x224x224, &input, {1, 3, 224, 224}); + // // use SetupTensor if not has local input image . + // SetupTensor(&input, {1, 3, 224, 224}, static_cast(0), + // static_cast(1)); + + auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112}); + auto output = executor.Predict(input, "data", "conv2d_0.tmp_2", out_ddim); + + auto output_ptr = output->data(); + for (int j = 0; j < 25; ++j) { + DLOG << " value of output: " << output_ptr[j]; + } + return 0; +} diff --git a/test/operators/test_cov_op.cpp b/test/operators/test_cov_op.cpp index 8057430309..3b53a3951a 100644 --- a/test/operators/test_cov_op.cpp +++ b/test/operators/test_cov_op.cpp @@ -16,15 +16,15 @@ limitations under the License. */ #include "operators/conv_op.h" int main() { - paddle_mobile::Loader loader; + paddle_mobile::Loader loader; // ../models/image_classification_resnet.inference.model auto program = loader.Load(g_googlenet); PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, "program file read fail"); - Executor4Test> + Executor4Test> executor(program, "conv2d"); paddle_mobile::framework::Tensor input; @@ -37,7 +37,7 @@ int main() { auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim); auto output_ptr = output->data(); - for (int j = 0; j < output->numel(); ++j) { + for (int j = 0; j < 20; ++j) { DLOG << " value of output: " << output_ptr[j]; } return 0; -- GitLab