提交 68e8dc4a 编写于 作者: H hjchen2

Support winograd algo to speed up 3x3 convlution operator

上级 4af571c4
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include "operators/math/math_function.h" #include "operators/math/math_function.h"
#include "operators/math/pad.h" #include "operators/math/pad.h"
#include "operators/math/vol2col.h" #include "operators/math/vol2col.h"
#include "operators/math/winograd/winograd.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -116,6 +117,34 @@ inline void ConvBasic(const ConvParam<CPU> &param) { ...@@ -116,6 +117,34 @@ inline void ConvBasic(const ConvParam<CPU> &param) {
} }
} }
inline void BatchConv3x3Winograd(const ConvParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor *filter = param.Filter();
Tensor *output = param.Output();
output->mutable_data<float>();
int batch_size = input->dims()[0];
int groups = param.Groups();
const std::vector<int> &paddings = param.Paddings();
math::PadFunctor<CPU, float> pad;
Tensor input_pad;
for (int i = 0; i < batch_size; ++i) {
Tensor in_batch = input->Slice(i, i + 1);
Tensor out_batch = output->Slice(i, i + 1);
if (paddings[0] == 0 && paddings[1] == 0) {
input_pad = in_batch;
} else {
framework::DDim pad_shape = in_batch.dims();
pad_shape[2] += 2 * paddings[0];
pad_shape[3] += 2 * paddings[1];
input_pad.mutable_data<float>(pad_shape);
pad(in_batch, paddings[0], paddings[0], paddings[1], paddings[1],
&input_pad);
}
math::winograd_f6k3(input_pad, *filter, &out_batch);
}
}
template <typename P> template <typename P>
void ConvCompute(const ConvParam<CPU> &param) { void ConvCompute(const ConvParam<CPU> &param) {
if (param.Input()->type() == typeid(int8_t)) { if (param.Input()->type() == typeid(int8_t)) {
...@@ -133,6 +162,12 @@ void ConvCompute(const ConvParam<CPU> &param) { ...@@ -133,6 +162,12 @@ void ConvCompute(const ConvParam<CPU> &param) {
param.Filter()->dims()[2] == 3) { param.Filter()->dims()[2] == 3) {
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(), math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), nullptr, param.Output(), false); param.Filter(), nullptr, param.Output(), false);
} else if (param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Strides()[0] == param.Strides()[1] &&
param.Dilations()[0] == param.Dilations()[1] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1 &&
param.Dilations()[0] == 1 && param.Input()->dims()[1] > 16) {
BatchConv3x3Winograd(param);
} else { } else {
ConvBasic<float, float>(param); ConvBasic<float, float>(param);
} }
......
...@@ -249,7 +249,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -249,7 +249,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
#if __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->mutable_data<float>();
const float *bias_data; const float *bias_data;
if (if_bias) { if (if_bias) {
bias_data = bias->data<float>(); bias_data = bias->data<float>();
......
...@@ -21,10 +21,12 @@ namespace math { ...@@ -21,10 +21,12 @@ namespace math {
template <typename T> template <typename T>
class PadFunctor<CPU, T> { class PadFunctor<CPU, T> {
public: public:
void operator()(const framework::Tensor &input, const int pad_h, void operator()(const framework::Tensor &input, const int pad_top,
const int pad_w, framework::Tensor *output) { const int pad_bottom, const int pad_left, const int pad_right,
framework::Tensor *output) {
const T *in_data = input.data<T>(); const T *in_data = input.data<T>();
T *out_data = output->mutable_data<T>(); T *out_data = output->mutable_data<T>();
// should check output shape is valid for such pad parameters
const framework::DDim &input_shape = input.dims(); const framework::DDim &input_shape = input.dims();
const framework::DDim &output_shape = output->dims(); const framework::DDim &output_shape = output->dims();
// fill output with 0 // fill output with 0
...@@ -32,13 +34,13 @@ class PadFunctor<CPU, T> { ...@@ -32,13 +34,13 @@ class PadFunctor<CPU, T> {
// should make sure the shape of output is match with input // should make sure the shape of output is match with input
for (int i = 0; i < input_shape[0]; ++i) { for (int i = 0; i < input_shape[0]; ++i) {
for (int c = 0; c < input_shape[1]; ++c) { for (int c = 0; c < input_shape[1]; ++c) {
out_data += pad_h * output_shape[3]; out_data += pad_top * output_shape[3];
for (int h = 0; h < input_shape[2]; ++h) { for (int h = 0; h < input_shape[2]; ++h) {
memcpy(out_data + pad_w, in_data, sizeof(T) * input_shape[3]); memcpy(out_data + pad_left, in_data, sizeof(T) * input_shape[3]);
out_data += output_shape[3]; out_data += output_shape[3];
in_data += input_shape[3]; in_data += input_shape[3];
} }
out_data += pad_h * output_shape[3]; out_data += pad_bottom * output_shape[3];
} }
} }
} }
......
...@@ -22,8 +22,9 @@ namespace math { ...@@ -22,8 +22,9 @@ namespace math {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class PadFunctor { class PadFunctor {
public: public:
void operator()(const framework::Tensor &input, const int pad_h, void operator()(const framework::Tensor &input, const int pad_top,
const int pad_w, framework::Tensor *output); const int pad_bottom, const int pad_left, const int pad_right,
framework::Tensor *output);
}; };
} // namespace math } // namespace math
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/math/winograd/winograd.h"
#include "operators/math/winograd/winograd_transform.h"
namespace paddle_mobile {
namespace operators {
namespace math {
// F(2X2, 3X3)
void winograd_f2k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
}
// F(6X6, 3X3)
void winograd_f6k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
framework::Tensor transformed_input;
framework::Tensor transformed_weight;
// transform weight
winograd_transform_weight<8, 3>(weight, &transformed_weight);
// tile input and transform
winograd_transform_input<8, 3>(input, &transformed_input);
// caculate output
winograd_transform_output<8, 3>(transformed_input, transformed_weight,
output);
}
// F(4X4, 5X5)
void winograd_f4k5(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "framework/tensor.h"
namespace paddle_mobile {
namespace operators {
namespace math {
// F(2X2, 3X3)
void winograd_f2k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
// F(6X6, 3X3)
void winograd_f6k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
// F(4X4, 5X5)
void winograd_f4k5(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "framework/tensor.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <int tile, int kernel>
void winograd_transform_weight(const framework::Tensor &weight,
framework::Tensor *output);
template <int tile, int kernel>
void winograd_transform_input(const framework::Tensor &input,
framework::Tensor *output);
template <int tile, int kernel>
void winograd_transform_output(const framework::Tensor &input,
const framework::Tensor &weight,
framework::Tensor *output);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册