diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h index d08eebe5493bd9026073c3349631a42024579b95..c4fa086daaee75a9076fecd4b09dbe66adab96db 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -15,19 +15,17 @@ limitations under the License. */ #ifdef CONV_OP #pragma once +#include #include -#include "operators/math/conv_func.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { -template -void ConvCompute(const ConvParam ¶m) { +inline void ConvBasic(const ConvParam ¶m) { const Tensor *input = param.Input(); Tensor filter = *param.Filter(); Tensor *output = param.Output(); - output->mutable_data(); int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); @@ -98,17 +96,37 @@ void ConvCompute(const ConvParam ¶m) { // vol2col vol2col(in_slice, dilations, strides, paddings, &col); } - // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, - static_cast(0)); + static_cast(1)); } } } +template +void ConvCompute(const ConvParam ¶m) { + Tensor Bias; + Bias.mutable_data({param.Groups()}); + if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(), + &Bias, false); + } else if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), + param.Filter(), &Bias, param.Output(), false); + } else { + ConvBasic(param); + } +} + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h index e43e3664cb005bab4d3c5ec8b5b35bd6925c982d..f2c898dbb41f3f94858189ca7d9abe9d2d8ab03e 100644 --- a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h @@ -15,8 +15,9 @@ limitations under the License. */ #ifdef DEPTHWISECONV_OP #pragma once +#include #include -#include "operators/math/conv_func.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" #include "operators/op_param.h" namespace paddle_mobile { @@ -24,89 +25,21 @@ namespace operators { template void DepthwiseConvCompute(const ConvParam ¶m) { - const Tensor *input = param.Input(); - Tensor filter = *param.Filter(); - Tensor *output = param.Output(); - output->mutable_data(); - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - - // DLOG << " compute end get Attrs " << strides[0]; - - const int batch_size = static_cast(input->dims()[0]); - - std::vector filter_shape_vec(framework::vectorize(filter.dims())); - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = - math::IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, false, col_matrix, false, - static_cast(1), &out_slice, - static_cast(0)); - } + Tensor Bias; + Bias.mutable_data({param.Groups()}); + if (param.Groups() == param.Input()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(), + &Bias, false); + } else if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), + param.Filter(), &Bias, param.Output(), false); + } else { + ConvBasic(param); } } diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index f74e365c7e087551e55363566d3dbd6ba530bfea..7d85ed1b6b680a3ce3fa8ce6061fe387fbb2f298 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -275,33 +275,38 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, float w22 = filter_data_tmp[8]; output_data[0] = w11 * input_data[0] + w12 * input_data[1] + - w21 * input_data[l] + w22 * input_data[l + 1] + - bias_data[j]; + w21 * input_data[l] + w22 * input_data[l + 1]; output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] + w20 * input_data[2 * l - 2] + - w21 * input_data[2 * l - 1] + bias_data[j]; + w21 * input_data[2 * l - 1]; output_data[(l - 1) * l] = w01 * input_data[(l - 2) * l] + w02 * input_data[(l - 2) * l + 1] + - w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1] + - bias_data[j]; + w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1]; output_data[l * l - 1] = w00 * input_data[(l - 2) * (l + 1)] + w01 * input_data[(l - 2) * (l + 1) + 1] + w10 * input_data[l * l - 2] + - w11 * input_data[l * l - 1] + bias_data[j]; - + w11 * input_data[l * l - 1]; + if (if_bias) { + output_data[0] += bias_data[j]; + output_data[l - 1] += bias_data[j]; + output_data[(l - 1) * l] += bias_data[j]; + output_data[l * l - 1] += bias_data[j]; + } for (int i = 1; i < l - 1; ++i) { output_data[i * l] = w01 * input_data[i * l - l] + w02 * input_data[i * l - l + 1] + w11 * input_data[i * l] + w12 * input_data[i * l + 1] + - w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1] + - bias_data[j]; + w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1]; output_data[i * l + l - 1] = w00 * input_data[i * l + l - 1 - l - 1] + w01 * input_data[i * l + l - 1 - l] + w10 * input_data[i * l + l - 1 - 1] + w11 * input_data[i * l + l - 1] + w20 * input_data[i * l + l - 1 + l - 1] + - w21 * input_data[i * l + l - 1 + l] + - bias_data[j]; + w21 * input_data[i * l + l - 1 + l]; + if (if_bias) { + output_data[i * l] += bias_data[j]; + output_data[i * l + l - 1] += bias_data[j]; + } } // top 1 row and bottom 1 row @@ -502,6 +507,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, } } } + void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter, Tensor *output, Tensor *bias, bool if_bias, Tensor *new_scale, Tensor *new_bias, diff --git a/tools/push2android.sh b/tools/push2android.sh new file mode 100644 index 0000000000000000000000000000000000000000..cc5ac1479b9579c59bbd92cb6849f275527de5b3 --- /dev/null +++ b/tools/push2android.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env sh + +push_fn () { +sh build.sh android +MODELS_PATH="../test/models/*" +MODELS_SRC="../test/models" +IMAGE_PATH="../test/images/*" +EXE_FILE="../test/build/*" +EXE_DIR="data/local/tmp/bin" +adb shell mkdir ${EXE_DIR} +MODELS_DIR="data/local/tmp/models" +adb shell mkdir ${MODELS_DIR} +for file in `ls ${MODELS_SRC}` +do + adb shell mkdir ${MODELS_DIR}"/"${file} +done + +if [[ -d "../src/operators/kernel/mali/ACL_Android/build" ]]; then +ACL_BUILD_PATH="../src/operators/kernel/mali/ACL_Android/build/*" +adb push ${ACL_BUILD_PATH} ${EXE_DIR} +fi + +IMAGES_DIR="data/local/tmp/images" +adb shell mkdir ${IMAGES_DIR} +LIB_PATH="../build/release/arm-v7a/build/*" +adb push ${EXE_FILE} ${EXE_DIR} +adb push ${LIB_PATH} ${EXE_DIR} +if [[ $1 != "npm" ]]; then +adb push ${IMAGE_PATH} ${IMAGES_DIR} +adb push ${MODELS_PATH} ${MODELS_DIR} +fi +} + +if [[ $1 == "npm" ]]; then +push_fn $1 +else +push_fn +fi