diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp index 68e0c7fa1e6996534ef87f771c9f1a3fb924224f..ecebc009bc36542f54578c881716d5fa92c04b7b 100644 --- a/src/operators/kernel/arm/batchnorm_kernel.cpp +++ b/src/operators/kernel/arm/batchnorm_kernel.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #pragma once #include "operators/kernel/batchnorm_kernel.h" -#include "operators/kernel/central-arm-func/batchnorm_func.h" +#include "operators/kernel/central-arm-func/batchnorm_arm_func.h" namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/arm/conv_add_relu_kernel.cpp b/src/operators/kernel/arm/conv_add_relu_kernel.cpp index 0ff86c7344fed8e4060b8d46b7ff457b031479d6..d3c04179b37014adc6c81f32dd6c08f697283671 100644 --- a/src/operators/kernel/arm/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef FUSION_CONVADD_RELU_OP #include "operators/kernel/conv_add_relu_kernel.h" -#include "operators/kernel/central-arm-func/conv_add_relu_func.h" +#include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h" namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp index 45fae59a838d4d5fd44a94559cdf60b615f5b924..049425d88f96a322a0b4cb47c18d85f2df03d577 100644 --- a/src/operators/kernel/arm/conv_kernel.cpp +++ b/src/operators/kernel/arm/conv_kernel.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef CONV_OP #include "operators/kernel/conv_kernel.h" -#include "operators/kernel/central-arm-func/conv_func.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp index 6cd16fda0dc6ea9928ab9fcfac5cd8b3c31a15a2..4cbfa23248e87e2bf3a8d97330fa19f92985a9d0 100644 --- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef DEPTHWISECONV_OP #include "operators/kernel/depthwise_conv_kernel.h" -#include "operators/kernel/conv_kernel.h" +#include "operators/kernel/central-arm-func/depthwise_conv_arm_func.h" namespace paddle_mobile { namespace operators { @@ -27,91 +27,7 @@ bool DepthwiseConvKernel::Init(const ConvParam ¶) const { template <> void DepthwiseConvKernel::Compute(const ConvParam ¶m) const { - LOG(kLOG_DEBUG) << param; - - const Tensor *input = param.Input(); - Tensor filter = *param.Filter(); - Tensor *output = param.Output(); - output->mutable_data(); - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - - // DLOG << " compute end get Attrs " << strides[0]; - - const int batch_size = static_cast(input->dims()[0]); - - std::vector filter_shape_vec(framework::vectorize(filter.dims())); - std::vector output_shape_vec(framework::vectorize(output->dims())); - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = input->dims()[1] / groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - framework::DDim col_shape(framework::make_ddim(col_shape_vec)); - - framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, data_dim + 1); - - bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); - Tensor col; - Tensor col_matrix; - if (is_expand) { - col.mutable_data(col_shape); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - - framework::DDim input_shape = framework::slice_ddim( - input->dims(), 1, static_cast(input->dims().size())); - - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = { - output->dims()[1], - output->numel() / (output->dims()[0] * output->dims()[1])}; - - // convolution operator: im2col(or vol2col) + gemm - int in_step = static_cast(input->dims()[1]) / groups; - int out_step = static_cast(output->dims()[1]) / groups; - - math::Vol2ColFunctor vol2col; - math::Im2ColFunctor im2col; - - for (int i = 0; i < batch_size; i++) { - Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); - Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); - - for (int g = 0; g < groups; g++) { - Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(in_slice, dilations, strides, - std::vector{paddings[0], paddings[1], paddings[0], - paddings[1]}, - &col); - } else if (data_dim == 3U) { - // vol2col - vol2col(in_slice, dilations, strides, paddings, &col); - } - - // gemm - Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); - Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(filter_slice, false, col_matrix, false, - static_cast(1), &out_slice, - static_cast(0)); - } - } + DepthwiseConvCompute(param); } template class DepthwiseConvKernel; diff --git a/src/operators/kernel/central-arm-func/batchnorm_func.h b/src/operators/kernel/central-arm-func/batchnorm_arm_func.h similarity index 100% rename from src/operators/kernel/central-arm-func/batchnorm_func.h rename to src/operators/kernel/central-arm-func/batchnorm_arm_func.h diff --git a/src/operators/kernel/central-arm-func/conv_add_relu_func.h b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h similarity index 99% rename from src/operators/kernel/central-arm-func/conv_add_relu_func.h rename to src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h index 416b4963aefdbd6f2c796378aec4b953a08e28cb..6aadbab95c591d4286fdbb3c3f01a291cdd90429 100644 --- a/src/operators/kernel/central-arm-func/conv_add_relu_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef FUSION_CONVADD_RELU_OP #pragma once +#include #include "operators/op_param.h" namespace paddle_mobile { diff --git a/src/operators/kernel/central-arm-func/conv_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h similarity index 96% rename from src/operators/kernel/central-arm-func/conv_func.h rename to src/operators/kernel/central-arm-func/conv_arm_func.h index 30cfb24043b32effb723f029ef7e5e5cdd1f1e99..d08eebe5493bd9026073c3349631a42024579b95 100644 --- a/src/operators/kernel/central-arm-func/conv_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -15,6 +15,8 @@ limitations under the License. */ #ifdef CONV_OP #pragma once +#include +#include "operators/math/conv_func.h" #include "operators/op_param.h" namespace paddle_mobile { @@ -48,7 +50,8 @@ void ConvCompute(const ConvParam ¶m) { framework::DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); - bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; Tensor col_matrix; if (is_expand) { diff --git a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..e43e3664cb005bab4d3c5ec8b5b35bd6925c982d --- /dev/null +++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef DEPTHWISECONV_OP + +#pragma once +#include +#include "operators/math/conv_func.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +void DepthwiseConvCompute(const ConvParam ¶m) { + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor *output = param.Output(); + output->mutable_data(); + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + + // DLOG << " compute end get Attrs " << strides[0]; + + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + math::matmul(filter_slice, false, col_matrix, false, + static_cast(1), &out_slice, + static_cast(0)); + } + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/conv_kernel.h b/src/operators/kernel/conv_kernel.h index 8efdc0bc067927d4a0f0d7fcef56d8a732e0b848..812ddd5a441f3a24c557546c1780248a557a6eb0 100644 --- a/src/operators/kernel/conv_kernel.h +++ b/src/operators/kernel/conv_kernel.h @@ -35,21 +35,6 @@ class ConvKernel : public OpKernelBase { bool Init(const ConvParam ¶) const; }; -inline bool IsExpand(const std::vector &filter_dim, - const std::vector &strides, - const std::vector &paddings, - const std::vector &dilations) { - bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; - for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); - strides_1 = strides_1 && (strides[j] == 1); - padding_0 = padding_0 && (paddings[j] == 0); - dilation_1 = dilation_1 && (dilations[j] == 1); - } - - return !(filter_1 && strides_1 && padding_0 && dilation_1); -} - } // namespace operators } // namespace paddle_mobile