diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a33db73e109042276b686e8ab74261273df87390..f07b2eeb93daa827361acc97951483c21092135f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -183,6 +183,7 @@ upstream 接下来等待 review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。 + ![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294877166787.jpg) 之后就可以提交代码了 @@ -222,6 +223,7 @@ upstream - 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。 - 建议:每次提交时,保持尽量少的commit,可以通过`git commit --amend`补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](http://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)。 - 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。 + 3. 如果解决了某个Issue的问题,请在该Pull Request的**第一个**评论框中加上:`fix #issue_number`,这样当该Pull Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)。 此外,在回复评审人意见时,请您遵守以下约定: diff --git a/doc/design_doc.md b/doc/design_doc.md index bf5f78e8d805465418cad8989945f2afa7ab5587..3407c78443de0f0c7d9ebab848122c2e089e9e41 100644 --- a/doc/design_doc.md +++ b/doc/design_doc.md @@ -3,6 +3,7 @@ #### 以下是 paddle-mobile 代码的执行流程图: + ![执行流程图](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305189473720.png) @@ -14,6 +15,7 @@ 先来看一下模型, 模型分为两种结构: 一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件 + ![模型描述](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305190629577.png) @@ -21,7 +23,6 @@ ![模型描述combined](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191057130.png) - loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu). 方便进行算法优化. diff --git a/doc/images/devices.png b/doc/images/devices.png new file mode 100644 index 0000000000000000000000000000000000000000..413d32c249972ee96f678d50a5cd0b36a2a03e29 Binary files /dev/null and b/doc/images/devices.png differ diff --git a/doc/images/flow_chart.png b/doc/images/flow_chart.png new file mode 100644 index 0000000000000000000000000000000000000000..c747230da43e2e688d7460704268631758d34596 Binary files /dev/null and b/doc/images/flow_chart.png differ diff --git a/doc/images/model_desc.png b/doc/images/model_desc.png new file mode 100644 index 0000000000000000000000000000000000000000..3c026b6192c8e1d84b3a82c3db91e022f35358c2 Binary files /dev/null and b/doc/images/model_desc.png differ diff --git a/doc/images/model_desc_combined.png b/doc/images/model_desc_combined.png new file mode 100644 index 0000000000000000000000000000000000000000..38e7388efcfdcad53f4e80ce0ac5d3b993eb986c Binary files /dev/null and b/doc/images/model_desc_combined.png differ diff --git a/src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp index e95bd8e76c5034f3897eff81e0ba67119d04a95b..1fd1c66d4dc92a9918243b23e400ef5309422050 100644 --- a/src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp @@ -22,11 +22,11 @@ namespace operators { template <> bool ConvAddBNReluKernel::Init(FusionConvAddBNReluParam *param) { - const Tensor *mean = (*param).InputMean(); - const Tensor *variance = (*param).InputVariance(); - const Tensor *scale = (*param).InputScale(); - const Tensor *bias = (*param).InputBias(); - const float epsilon = (*param).Epsilon(); + const Tensor *mean = param->InputMean(); + const Tensor *variance = param->InputVariance(); + const Tensor *scale = param->InputScale(); + const Tensor *bias = param->InputBias(); + const float epsilon = param->Epsilon(); auto mean_ptr = mean->data(); auto variance_ptr = variance->data(); @@ -47,8 +47,8 @@ bool ConvAddBNReluKernel::Init(FusionConvAddBNReluParam *param) { new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; } - (*param).SetNewScale(new_scale); - (*param).SetNewBias(new_bias); + param->SetNewScale(new_scale); + param->SetNewBias(new_bias); return true; } diff --git a/src/operators/kernel/central-arm-func/conv_add_bn_relu_func.h b/src/operators/kernel/central-arm-func/conv_add_bn_relu_func.h index bf96a2d46fd96516743127b71db57496e35b8a77..13fe50bf74ee164c2cc663f5a6a9eeddbfa3804b 100644 --- a/src/operators/kernel/central-arm-func/conv_add_bn_relu_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_bn_relu_func.h @@ -17,11 +17,10 @@ limitations under the License. */ #pragma once #include "operators/math/depthwise_conv_3x3.h" #include "operators/op_param.h" + namespace paddle_mobile { namespace operators { - -template -void ConvAddBNReluCompute(const FusionConvAddBNReluParam ¶m) { +void ConvAddBNReluBasic(const FusionConvAddBNReluParam ¶m) { const Tensor *input = param.Input(); Tensor filter = *param.Filter(); Tensor bias = *param.Bias(); @@ -30,105 +29,122 @@ void ConvAddBNReluCompute(const FusionConvAddBNReluParam ¶m) { auto new_bias_ptr = new_bias.data(); auto new_scale_ptr = new_scale.data(); int axis = param.Axis(); + Tensor *output = param.Output(); + math::expand_bias(bias, axis, output->dims()); + output->ShareDataWith(bias); + int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); - Tensor *output = param.Output(); + + const int batch_size = static_cast(input->dims()[0]); + std::vector filter_shape_vec(framework::vectorize(filter.dims())); - if (filter_shape_vec[2] == 3 && strides[0] == 1 && groups > 1) { - math::DepthwiseConvAddBNRelu3x3s1p1(input, filter, output, &bias, 1, - &new_scale, &new_bias, 1, 1); - } else { - const int batch_size = static_cast(input->dims()[0]); - - math::expand_bias(bias, axis, output->dims()); - output->ShareDataWith(bias); - - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = - math::IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, false, col_matrix, false, - static_cast(1), &out_slice, - static_cast(1), false); + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); } + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + math::matmul(filter_slice, false, col_matrix, false, + static_cast(1), &out_slice, + static_cast(1)); } - - auto output_ptr = output->data(); - for (int c = 0; c < output_matrix_shape[0]; c++) { - int start = c * output_matrix_shape[1]; - for (int j = 0; j < output_matrix_shape[1]; j++) { - output_ptr[start + j] = - output_ptr[start + j] * new_scale_ptr[c] + new_bias_ptr[c]; - output_ptr[start + j] = - output_ptr[start + j] < 0 ? 0 : output_ptr[start + j]; - } + } + /// todo : use neon in special case instead of 2for(300ms) + auto output_ptr = output->data(); + for (int c = 0; c < output_matrix_shape[0]; c++) { + int start = c * output_matrix_shape[1]; + for (int j = 0; j < output_matrix_shape[1]; j++) { + output_ptr[start + j] = + output_ptr[start + j] * new_scale_ptr[c] + new_bias_ptr[c]; + output_ptr[start + j] = + output_ptr[start + j] < 0 ? 0 : output_ptr[start + j]; } } } +template +void ConvAddBNReluCompute(const FusionConvAddBNReluParam ¶m) { + Tensor Bias; + Bias.mutable_data({param.Groups()}); + if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConvAddBNRelu3x3s1p1( + param.Input(), param.Filter(), param.Output(), &Bias, 1, + param.NewScale(), param.NewBias(), 1, 1); + } else if (0 && param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), + param.Filter(), &Bias, param.Output(), false); + } else { + ConvAddBNReluBasic(param); + } +} + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h index d08eebe5493bd9026073c3349631a42024579b95..6accf1937da5343a33d9dd739c125836f080f181 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -15,19 +15,19 @@ limitations under the License. */ #ifdef CONV_OP #pragma once +#include #include -#include "operators/math/conv_func.h" + #include "operators/op_param.h" namespace paddle_mobile { namespace operators { -template -void ConvCompute(const ConvParam ¶m) { +inline void ConvBasic(const ConvParam ¶m) { const Tensor *input = param.Input(); Tensor filter = *param.Filter(); Tensor *output = param.Output(); - output->mutable_data(); + int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); @@ -109,6 +109,27 @@ void ConvCompute(const ConvParam ¶m) { } } +template +void ConvCompute(const ConvParam ¶m) { + Tensor Bias; + Bias.mutable_data({param.Groups()}); + if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(), + &Bias, false); + } else if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), + param.Filter(), &Bias, param.Output(), false); + } else { + ConvBasic(param); + } +} + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h index e43e3664cb005bab4d3c5ec8b5b35bd6925c982d..885f2051f645546c2585caa72aa9c80f8d352e6c 100644 --- a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h @@ -15,8 +15,10 @@ limitations under the License. */ #ifdef DEPTHWISECONV_OP #pragma once +#include #include -#include "operators/math/conv_func.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" + #include "operators/op_param.h" namespace paddle_mobile { @@ -24,89 +26,21 @@ namespace operators { template void DepthwiseConvCompute(const ConvParam ¶m) { - const Tensor *input = param.Input(); - Tensor filter = *param.Filter(); - Tensor *output = param.Output(); - output->mutable_data(); - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - - // DLOG << " compute end get Attrs " << strides[0]; - - const int batch_size = static_cast(input->dims()[0]); - - std::vector filter_shape_vec(framework::vectorize(filter.dims())); - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = - math::IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, false, col_matrix, false, - static_cast(1), &out_slice, - static_cast(0)); - } + Tensor Bias; + Bias.mutable_data({param.Groups()}); + if (param.Groups() == param.Input()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(), + &Bias, false); + } else if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), + param.Filter(), &Bias, param.Output(), false); + } else { + ConvBasic(param); } } diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index f74e365c7e087551e55363566d3dbd6ba530bfea..984678e8730ea58d7dc647450dd098d265f0eb39 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -275,33 +275,40 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, float w22 = filter_data_tmp[8]; output_data[0] = w11 * input_data[0] + w12 * input_data[1] + - w21 * input_data[l] + w22 * input_data[l + 1] + - bias_data[j]; + w21 * input_data[l] + w22 * input_data[l + 1]; output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] + w20 * input_data[2 * l - 2] + - w21 * input_data[2 * l - 1] + bias_data[j]; + w21 * input_data[2 * l - 1]; output_data[(l - 1) * l] = w01 * input_data[(l - 2) * l] + w02 * input_data[(l - 2) * l + 1] + - w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1] + - bias_data[j]; + w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1]; output_data[l * l - 1] = w00 * input_data[(l - 2) * (l + 1)] + w01 * input_data[(l - 2) * (l + 1) + 1] + w10 * input_data[l * l - 2] + - w11 * input_data[l * l - 1] + bias_data[j]; + w11 * input_data[l * l - 1]; + if (if_bias) { + output_data[0] += bias_data[j]; + output_data[l - 1] += bias_data[j]; + output_data[(l - 1) * l] += bias_data[j]; + output_data[l * l - 1] += bias_data[j]; + } for (int i = 1; i < l - 1; ++i) { output_data[i * l] = w01 * input_data[i * l - l] + w02 * input_data[i * l - l + 1] + w11 * input_data[i * l] + w12 * input_data[i * l + 1] + - w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1] + - bias_data[j]; + w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1]; + output_data[i * l + l - 1] = w00 * input_data[i * l + l - 1 - l - 1] + w01 * input_data[i * l + l - 1 - l] + w10 * input_data[i * l + l - 1 - 1] + w11 * input_data[i * l + l - 1] + w20 * input_data[i * l + l - 1 + l - 1] + - w21 * input_data[i * l + l - 1 + l] + - bias_data[j]; + w21 * input_data[i * l + l - 1 + l]; + if (if_bias) { + output_data[i * l] += bias_data[j]; + output_data[i * l + l - 1] += bias_data[j]; + } } // top 1 row and bottom 1 row @@ -502,12 +509,14 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, } } } -void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter, + +void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, Tensor *bias, bool if_bias, - Tensor *new_scale, Tensor *new_bias, - bool if_bn, bool if_relu) { + const Tensor *new_scale, + const Tensor *new_bias, bool if_bn, + bool if_relu) { const float *input_data = input->data(); - const float *filter_data = filter.data(); + const float *filter_data = filter->data(); float *output_data = output->data(); const float *bias_data = bias->data(); const float *newscale_data = new_scale->data(); @@ -547,29 +556,35 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter, float w21 = filter_data_tmp[7]; float w22 = filter_data_tmp[8]; - output_data[0] = - (w11 * input_data[0] + w12 * input_data[1] + w21 * input_data[l] + - w22 * input_data[l + 1] + bias_data[j]) * - newscale_data[j] + - newbias_data[j]; - output_data[l - 1] = (w10 * input_data[l - 2] + w11 * input_data[l - 1] + - w20 * input_data[2 * l - 2] + - w21 * input_data[2 * l - 1] + bias_data[j]) * - newscale_data[j] + - newbias_data[j]; + output_data[0] = w11 * input_data[0] + w12 * input_data[1] + + w21 * input_data[l] + w22 * input_data[l + 1]; + + output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] + + w20 * input_data[2 * l - 2] + + w21 * input_data[2 * l - 1]; output_data[(l - 1) * l] = - (w01 * input_data[(l - 2) * l] + w02 * input_data[(l - 2) * l + 1] + - w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1] + - bias_data[j]) * - newscale_data[j] + - newbias_data[j]; - output_data[l * l - 1] = (w00 * input_data[(l - 2) * (l + 1)] + - w01 * input_data[(l - 2) * (l + 1) + 1] + - w10 * input_data[l * l - 2] + - w11 * input_data[l * l - 1] + bias_data[j]) * - newscale_data[j] + - newbias_data[j]; + w01 * input_data[(l - 2) * l] + w02 * input_data[(l - 2) * l + 1] + + w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1]; + output_data[l * l - 1] = w00 * input_data[(l - 2) * (l + 1)] + + w01 * input_data[(l - 2) * (l + 1) + 1] + + w10 * input_data[l * l - 2] + + w11 * input_data[l * l - 1]; + if (if_bias) { + output_data[0] += bias_data[j]; + output_data[l - 1] += bias_data[j]; + output_data[(l - 1) * l] += bias_data[j]; + output_data[l * l - 1] += bias_data[j]; + } + if (if_bn) { + output_data[0] = output_data[0] * newscale_data[j] + newbias_data[j]; + output_data[l - 1] = + output_data[l - 1] * newscale_data[j] + newbias_data[j]; + output_data[(l - 1) * l] = + output_data[(l - 1) * l] * newscale_data[j] + newbias_data[j]; + output_data[l * l - 1] = + output_data[l * l - 1] * newscale_data[j] + newbias_data[j]; + } if (if_relu) { output_data[0] = output_data[0] < 0 ? 0 : output_data[0]; output_data[l - 1] = output_data[l - 1] < 0 ? 0 : output_data[l - 1]; @@ -580,21 +595,25 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter, } for (int i = 1; i < l - 1; ++i) { output_data[i * l] = - (w01 * input_data[i * l - l] + w02 * input_data[i * l - l + 1] + - w11 * input_data[i * l] + w12 * input_data[i * l + 1] + - w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1] + - bias_data[j]) * - newscale_data[j] + - newbias_data[j]; - output_data[i * l + l - 1] = - (w00 * input_data[i * l + l - 1 - l - 1] + - w01 * input_data[i * l + l - 1 - l] + - w10 * input_data[i * l + l - 1 - 1] + - w11 * input_data[i * l + l - 1] + - w20 * input_data[i * l + l - 1 + l - 1] + - w21 * input_data[i * l + l - 1 + l] + bias_data[j]) * - newscale_data[j] + - newbias_data[j]; + w01 * input_data[i * l - l] + w02 * input_data[i * l - l + 1] + + w11 * input_data[i * l] + w12 * input_data[i * l + 1] + + w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1]; + output_data[i * l + l - 1] = w00 * input_data[i * l + l - 1 - l - 1] + + w01 * input_data[i * l + l - 1 - l] + + w10 * input_data[i * l + l - 1 - 1] + + w11 * input_data[i * l + l - 1] + + w20 * input_data[i * l + l - 1 + l - 1] + + w21 * input_data[i * l + l - 1 + l]; + if (if_bias) { + output_data[i * l] += bias_data[j]; + output_data[i * l + l - 1] += bias_data[j]; + } + if (if_bn) { + output_data[i * l] = + output_data[i * l] * newscale_data[j] + newbias_data[j]; + output_data[i * l + l - 1] = + output_data[i * l + l - 1] * newscale_data[j] + newbias_data[j]; + } if (if_relu) { output_data[i * l] = output_data[i * l] < 0 ? 0 : output_data[i * l]; output_data[i * l + l - 1] = diff --git a/src/operators/math/depthwise_conv_3x3.h b/src/operators/math/depthwise_conv_3x3.h index 44299295eebad6a90fd994cf74589c09a3573aee..a0beb479926902a71b7e06128aa8cecdd5443196 100644 --- a/src/operators/math/depthwise_conv_3x3.h +++ b/src/operators/math/depthwise_conv_3x3.h @@ -32,10 +32,11 @@ void DepthwiseConv3x3(const Tensor *input, vector strides, Tensor *output, bool if_bias); void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, Tensor *bias, bool if_bias); -void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter, +void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, Tensor *bias, bool if_bias, - Tensor *new_scale, Tensor *new_bias, - bool if_bn, bool if_relu); + const Tensor *new_scale, + const Tensor *new_bias, bool if_bn, + bool if_relu); } // namespace math } // namespace operators } // namespace paddle_mobile