提交 704c1148 编写于 作者: H hjchen2

Refine: use im2col+gemm to implement int8 3x3 and 5x5 conv, this will get higher performance

上级 492d60a5
......@@ -120,10 +120,15 @@ inline void ConvCompute_int8(const ConvParam<CPU> &param) {
typedef void (*ConvFunc)(const Tensor &input, const Tensor &kernel,
Tensor *output);
static ConvFunc conv_funcs_table[7][5] = {
{0, 0, 0, 0, 0}, // k = 1
{0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0}, // k = 3
{0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0}, // k = 5
{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, // k = 7
{0, 0, 0, 0, 0}, // k = 1
// {0, 0, 0, 0, 0}, {conv3x3s1_int8, 0, 0, 0, 0}, // k = 3
// {0, 0, 0, 0, 0}, {conv5x5s1_int8, 0, 0, 0, 0}, // k = 5
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 3
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 5
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}, // k = 7
};
const Tensor *input = param.Input();
Tensor *filter = param.Filter();
......
......@@ -24,10 +24,6 @@ namespace operators {
void conv3x3s1_int8(const framework::Tensor& input,
const framework::Tensor& weight, framework::Tensor* output);
void conv3x3s1_int8_4c(const framework::Tensor& input,
const framework::Tensor& weight,
framework::Tensor* output);
void conv5x5s1_int8(const framework::Tensor& input,
const framework::Tensor& weight, framework::Tensor* output);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册