提交 704c1148 编写于 作者: H hjchen2

Refine: use im2col+gemm to implement int8 3x3 and 5x5 conv, this will get higher performance

上级 492d60a5
...@@ -120,10 +120,15 @@ inline void ConvCompute_int8(const ConvParam<CPU> &param) { ...@@ -120,10 +120,15 @@ inline void ConvCompute_int8(const ConvParam<CPU> &param) {
typedef void (*ConvFunc)(const Tensor &input, const Tensor &kernel, typedef void (*ConvFunc)(const Tensor &input, const Tensor &kernel,
Tensor *output); Tensor *output);
static ConvFunc conv_funcs_table[7][5] = { static ConvFunc conv_funcs_table[7][5] = {
{0, 0, 0, 0, 0}, // k = 1 {0, 0, 0, 0, 0}, // k = 1
{0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0}, // k = 3 // {0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0}, // k = 3
{0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0}, // k = 5 // {0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0}, // k = 5
{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, // k = 7 {0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 3
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 5
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 7
}; };
const Tensor *input = param.Input(); const Tensor *input = param.Input();
Tensor *filter = param.Filter(); Tensor *filter = param.Filter();
......
...@@ -24,10 +24,6 @@ namespace operators { ...@@ -24,10 +24,6 @@ namespace operators {
void conv3x3s1_int8(const framework::Tensor& input, void conv3x3s1_int8(const framework::Tensor& input,
const framework::Tensor& weight, framework::Tensor* output); const framework::Tensor& weight, framework::Tensor* output);
void conv3x3s1_int8_4c(const framework::Tensor& input,
const framework::Tensor& weight,
framework::Tensor* output);
void conv5x5s1_int8(const framework::Tensor& input, void conv5x5s1_int8(const framework::Tensor& input,
const framework::Tensor& weight, framework::Tensor* output); const framework::Tensor& weight, framework::Tensor* output);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册