未验证 提交 fa40eaa4 编写于 作者: B bzhang5 提交者: GitHub

Tengine lite (#660)

* support eltwise , reduction sum 5 dims and fix pad

* support eltwise , reduction sum 5 dims and fix pad

* support eltwise , reduction sum 5 dims and fix pad

* fix compile error caused by reduction operator

* add stdio.h
Co-authored-by: Nbzhang5 <bzhang@openailab.com>
上级 401f7534
......@@ -768,7 +768,8 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
input_tensor1 = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[1]);
input1 = input_tensor1->data;
input1_count4 = input_tensor1->elem_num;
input_hw_1 = input_tensor1->dims[2]*input_tensor1->dims[3];
int dim1_size = input_tensor1->dim_num;
input_hw_1 = input_tensor1->dims[dim1_size-2]*input_tensor1->dims[dim1_size-1];
}
if (!input_tensor1 || input_tensor0->elem_num >= input_tensor1->elem_num)
......@@ -776,11 +777,14 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
int input_chan_0 = 0;
int input_hw_0 = 0;
int input0_count4 = input_tensor0->elem_num;
int dim0_size = input_tensor0->dim_num;
if (layout == TENGINE_LAYOUT_NCHW)
{
input_chan_0 = input_tensor0->dims[1];
input_hw_0 = input_tensor0->dims[2] * input_tensor0->dims[3];
input_chan_0 = input_tensor0->dims[dim0_size-3];
if(input_tensor0->dims[dim0_size-4]){
input_chan_0 *= input_tensor0->dims[dim0_size-4];
}
input_hw_0 = input_tensor0->dims[dim0_size-2] * input_tensor0->dims[dim0_size-1];
}
else if (layout == TENGINE_LAYOUT_NHWC)
{
......@@ -792,7 +796,6 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
TLOG_ERR("unknown graph layout: %d\n", ir_graph->graph_layout);
return -1;
}
int ret = -1;
if (input_tensor0->data_type == TENGINE_DT_FP32)
ret = ref_eltwise_fp32(output, input0, input1, eltwise_param->type, input0_count4, input_chan_0, input_hw_0,
......
......@@ -52,7 +52,6 @@ static void ref_pad_fp32(float* input, float* output, int in_h, int in_w, int ou
{
float* ptr = input;
float* outptr = output;
int y = 0;
// fill top
for (; y < top; y++)
......@@ -83,10 +82,11 @@ static void ref_pad_fp32(float* input, float* output, int in_h, int in_w, int ou
{
// memcpy(outptr + left, ptr, in_w * sizeof(float));
// x += in_w;
for (; x < in_w; x++)
for (x = 0; x < in_w; x++)
{
outptr[left + x] = ptr[x];
}
x++;
}
for (; x < out_w; x++)
{
......
......@@ -28,11 +28,14 @@
#include <string.h>
#include <math.h>
#include <stdio.h>
#define FLOAT_MAX 3.4028235E38
#define FLOAT_MIN -3.4028235E38
void sum_5d_ax1(int* dims, int dim_num, float* data, float* tmp);
void sum_4d_ax0(int dim0, int dim1, int dim2, int dim3, float* data, float* tmp);
void sum_4d_ax1(int dim0, int dim1, int dim2, int dim3, float* data, float* tmp);
void sum_4d_ax2(int dim0, int dim1, int dim2, int dim3, float* data, float* tmp);
......@@ -140,21 +143,16 @@ struct reduce_param_ref
};
static int ref_reduce_fp32(float* data, float* out_data, int dim0, int dim1, int dim2, int dim3, int out_size,
struct reduce_param_ref* param)
struct reduce_param_ref* param, int dim_num, int* dims)
{
int offset = 0;
// TLOG_ERR("out_size:%d\n",out_size);
// TLOG_ERR("dim0:%d,dim1:%d,dim2:%d,dim3:%d\n",dim0,dim1,dim2,dim3);
float* tmp = ( float* )sys_malloc(sizeof(float) * out_size);
// TLOG_ERR("tmp:%p\n",tmp);
memset(tmp, 0, sizeof(float) * out_size);
int param_dim0 = param->param_dim[0];
int param_dim1 = param->param_dim[1];
int param_dim2 = param->param_dim[2];
int param_dim3 = param->param_dim[3];
// TLOG_ERR("param_dim0:%d,param_dim1:%d,param_dim2:%d,param_dim3:%d\n",param_dim0,param_dim1,param_dim2,param_dim3);
// TLOG_ERR("%d, %d, %d, %d, %d\n",param_dim0, param_dim1, param_dim2, param_dim3, param->type);
// reduce sum
if (param->type == 0)
{
if ((param_dim0 == -2 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2) ||
......@@ -176,12 +174,19 @@ static int ref_reduce_fp32(float* data, float* out_data, int dim0, int dim1, int
}
}
}
else if(param_dim0 == 1 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2 && (dim_num > 4))
{
if(dim_num == 5){
sum_5d_ax1(dims, dim_num, data, tmp);
}
}
else if (param_dim0 == 0 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2)
{
sum_4d_ax0(dim0, dim1, dim2, dim3, data, tmp);
}
else if (param_dim0 == 1 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2)
else if (param_dim0 == 1 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2 && (dim_num <= 4) )
{
fprintf(stderr, "wrond dim_num %d \n", dim_num);
sum_4d_ax1(dim0, dim1, dim2, dim3, data, tmp);
}
else if (param_dim0 == 2 && param_dim1 == -2 && param_dim2 == -2 && param_dim3 == -2)
......@@ -2393,6 +2398,23 @@ void mean_2d_ax1(int dim1, int dim2, float* tmp, float* tmp_1)
}
// sum
void sum_5d_ax1(int* dims, int dim_num, float* data, float* tmp)
{
int dim0 = dims[0];
int dim1 = dims[1];
int dim2 = dims[2];
int dim3 = dims[3];
int dim4 = dims[4];
int chw = dim2*dim3*dim4;
for(int j = 0; j < dim0; j++){
for(int n = 0; n < dim1; n++){
for(int size = 0; size < chw; size++){
tmp[size] += data[n*chw + size];
}
}
}
}
void sum_4d_ax0(int dim0, int dim1, int dim2, int dim3, float* data, float* tmp)
{
for (int j = 0; j < dim1 * dim2 * dim3; j++)
......
......@@ -72,8 +72,8 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
}
int element_size = output_tensor->elem_size;
int dims[4] = {1, 1, 1, 1};
// int dims[4] = {1, 1, 1, 1};
int* dims = (int*)malloc(input_tensor->dim_num*sizeof(int));
for (int i = 0; i < input_tensor->dim_num; i++)
{
dims[i] = input_tensor->dims[i];
......@@ -82,15 +82,18 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
int dim1 = dims[1];
int dim2 = dims[2];
int dim3 = dims[3];
param.param_dim[0] = reduction_param->dim_0;
param.param_dim[1] = reduction_param->dim_1;
param.param_dim[2] = reduction_param->dim_2;
param.param_dim[3] = reduction_param->dim_3;
param.type = reduction_param->type;
int in_dim_num = input_tensor->dim_num;
// printf("input dims: %d \n", input_tensor->dim_num);
int ret = ref_reduce_fp32(( float* )input_tensor->data, ( float* )output_tensor->data, dim0, dim1, dim2, dim3,
out_tensor_size, &param);
out_tensor_size, &param, in_dim_num, dims);
free(dims);
if (ret < 0)
return -1;
else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册