未验证 提交 eb0a08ea 编写于 作者: S shitouren1994 提交者: GitHub

1.fix fc sgmv1x2bug 2.fix benchmark error 3.add int8 im2col+gemm (#478)


* 1.fix x86 benchmark bug 2.add int8 im2col+gemm
Co-authored-by: Nshitouren1994 <shihebing@bigo.sg>
上级 0a5a9637
......@@ -97,6 +97,15 @@ static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct
}
}
#endif
else if (exec_graph->mode == TENGINE_MODE_INT8)
{
if (int8_conv_hcl_prerun(input_tensor,filter_tensor,output_tensor,conv_priv_info,conv_param) < 0)
{
TLOG_ERR("hcl conv hybrid int8 prerun failed\n");
set_tengine_errno(EFAULT);
return -1;
}
}
else
{
printf("Tengine work node not support %d\n", exec_graph->mode);
......@@ -151,6 +160,15 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
}
}
#endif
else if (exec_graph->mode == TENGINE_MODE_INT8)
{
if (int8_conv_hcl_run(input_tensor, weight_tensor, bias_tensor, output_tensor, conv_priv_info, conv_param, num_thread, cpu_affinity) < 0)
{
TLOG_ERR("hcl conv int8 run failed\n");
set_tengine_errno(EFAULT);
return -1;
}
}
else
{
printf("Tengine work node not support %d\n", exec_graph->mode);
......@@ -191,6 +209,15 @@ static int postrun(struct node_ops* node_ops, struct exec_node* exec_node, struc
}
}
#endif
else if (exec_graph->mode == TENGINE_MODE_INT8)
{
if (int8_conv_hcl_postrun(conv_priv_info) < 0)
{
TLOG_ERR("hcl conv fp16 postrun failed\n");
set_tengine_errno(EFAULT);
return -1;
}
}
else
{
printf("Tengine work node not support %d\n", exec_graph->mode);
......@@ -236,6 +263,10 @@ static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, str
exec_node->shared_mem_size = fp16_conv_hcl_get_shared_mem_size(input_tensor, output_tensor, conv_param);
}
#endif
else if (exec_graph->mode == TENGINE_MODE_INT8)
{
exec_node->shared_mem_size = int8_conv_hcl_get_shared_mem_size(input_tensor,output_tensor,conv_param);
}
else
{
printf("Tengine work node not support %d\n", exec_graph->mode);
......@@ -266,6 +297,8 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
int kernel_w = param->kernel_w;
int in_c = input_tensor->dims[1] / group;
int out_c = output_tensor->dims[1] / group;
if (input_tensor->data_type == TENGINE_DT_INT8)
return OPS_SCORE_BEST;
/* todo support int8/fp16 */
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
......
......@@ -72,6 +72,17 @@ int conv_hcl_get_shared_pack4_mem_size(struct ir_tensor* input_tensor, struct ir
int conv_hcl_set_shared_mem(struct conv_priv_info* priv_info, void* mem, int mem_size) __attribute__((weak));
int conv_hcl_set_shared_pack4_mem(struct conv_priv_info* priv_info, void* mem, int mem_size) __attribute__((weak));
int int8_conv_hcl_prerun(struct ir_tensor* input_tensor,
struct ir_tensor* filter_tensor,
struct ir_tensor* output_tensor,
struct conv_priv_info* priv_info,
struct conv_param* param) __attribute__((weak));
int int8_conv_hcl_postrun(struct conv_priv_info* info) __attribute__((weak));
int int8_conv_hcl_run(struct ir_tensor* input_tensor , struct ir_tensor* filter_tensor ,struct ir_tensor* bias_tensor , struct ir_tensor* output_tensor , struct conv_priv_info* conv_info ,struct conv_param* param, int num_thread, int cpu_affinity) __attribute__((weak));
int int8_conv_hcl_get_shared_mem_size(struct ir_tensor* input_tensor ,struct ir_tensor* output_tensor , struct conv_param* param) __attribute__((weak)) ;
/* fp16 */
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
int fp16_conv_hcl_prerun(struct ir_tensor* input_tensor,
......
此差异已折叠。
......@@ -109,7 +109,8 @@ void gemv_1x2_int8(const int32_t *biases, const float *scales, int8_t *inp, int8
int remainw = (kernel_size << 3) >> 3;
int8x8x2_t weight;
int8x8_t input;
int16x8_t out_16_0, out_16_1, out_32_0, out_32_1;
int16x8_t out_16_0, out_16_1;
int32x4_t out_32_0, out_32_1;
int32_t sum0 = 0, sum1 = 0;
for (int i = 0; i < remainw; i = i + 8) {
weight = vld2_s8(weight_ptr);
......
......@@ -146,9 +146,7 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
{
if (pad_h0 == 0 && pad_h1 == 0 && (pool_size == POOL_K2S2 || pool_size == POOL_K3S2))
return OPS_SCORE_BEST;
if (pad_h0 == 1 && pad_h1 == 1 && (pool_size == POOL_K2S2 || pool_size == POOL_K3S1))
return OPS_SCORE_BEST;
else if (pad_h0 == 0 && pad_h1 == 1 && (pool_size == POOL_K3S2))
if (pad_h0 == 1 && pad_h1 == 1 && (pool_size == POOL_K2S2 || pool_size == POOL_K3S2))
return OPS_SCORE_BEST;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册