未验证 提交 c73708ce 编写于 作者: S shijie001 提交者: GitHub

Fixed gelu, added layernorm. Added timvx version gelu and layernorm (#1415)

* Fixed gelu save_graph error

Added SaveTmGeluOp()

* Added gelu timvx

* Added layernorm operator

* Added layernorm timvx
上级 cb3b6e6a
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: Shijie Chen
*/
#include "layernorm_param.h"
#include "graph/tensor.h"
#include "graph/node.h"
#include "graph/graph.h"
#include "utility/sys_port.h"
#include "utility/float.h"
#include "utility/log.h"
#include "device/cpu/cpu_node.h"
#include "device/cpu/cpu_graph.h"
#include "device/cpu/cpu_module.h"
#include <math.h>
static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
{
return 0;
}
static int release_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
{
return 0;
}
static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
{
return 0;
}
static int ref_layernorm_fp32(struct tensor* input_tensor, struct tensor* output_tensor,
struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps)
{
#if 1
// TIM-VX
int norm_size = input_tensor->dims[input_tensor->dim_num - 1];
int count = 1;
for (int i = 0; i < input_tensor->dim_num - 1; i++)
{
count *= input_tensor->dims[i];
}
#else
// PyTorch
int norm_size = gamma_tensor->elem_num;
int count = input_tensor->elem_num / gamma_tensor->elem_num;
#endif
const float* input_data = (const float*)input_tensor->data;
float* output_data = (float*)output_tensor->data;
const float* gamma_data = (const float*)gamma_tensor->data;
const float* beta_data = (const float*)beta_tensor->data;
for (int i = 0; i < count; i++)
{
float sum = 0.f;
float sqsum = 0.f;
for (int j = 0; j < norm_size; j++)
{
float x = input_data[i * norm_size + j];
sum += x;
sqsum += x * x;
}
float mean = sum / norm_size;
float var = sqsum / norm_size - mean * mean;
float a = 1.0f / sqrtf(var + eps);
float b = -mean * a;
for (int j = 0; j < norm_size; j++)
{
int offset = i * norm_size + j;
output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j];
}
}
return 0;
}
static int ref_layernorm_uint8(struct tensor* input_tensor, struct tensor* output_tensor,
struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps)
{
#if 1
// TIM-VX
int norm_size = input_tensor->dims[input_tensor->dim_num - 1];
int count = 1;
for (int i = 0; i < input_tensor->dim_num - 1; i++)
{
count *= input_tensor->dims[i];
}
#else
// PyTorch
int norm_size = gamma_tensor->elem_num;
int count = input_tensor->elem_num / gamma_tensor->elem_num;
#endif
int total_size = input_tensor->elem_num;
float* input_data = (float*)sys_malloc(total_size * sizeof(float));
float* output_data = (float*)sys_malloc(total_size * sizeof(float));
// dequant
{
const uint8_t* input_uint8 = (const uint8_t*)input_tensor->data;
float input_scale = input_tensor->scale;
int input_zero = input_tensor->zero_point;
for (int i = 0; i < total_size; i++)
input_data[i] = ((float)input_uint8[i] - (float)input_zero) * input_scale;
}
const float* gamma_data = (const float*)gamma_tensor->data;
const float* beta_data = (const float*)beta_tensor->data;
for (int i = 0; i < count; i++)
{
float sum = 0.f;
float sqsum = 0.f;
for (int j = 0; j < norm_size; j++)
{
float x = input_data[i * norm_size + j];
sum += x;
sqsum += x * x;
}
float mean = sum / norm_size;
float var = sqsum / norm_size - mean * mean;
float a = 1.0f / sqrtf(var + eps);
float b = -mean * a;
for (int j = 0; j < norm_size; j++)
{
int offset = i * norm_size + j;
output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j];
}
}
// quant
{
uint8_t* output_uint8 = (uint8_t*)output_tensor->data;
float output_scale = output_tensor->scale;
int output_zero = output_tensor->zero_point;
for (int i = 0; i < total_size; i++)
{
int udata = (int)roundf(output_data[i] / output_scale + output_zero);
if (udata > 255)
udata = 255;
else if (udata < 0)
udata = 0;
output_uint8[i] = udata;
}
}
sys_free(input_data);
sys_free(output_data);
return 0;
}
static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
{
struct node* node = exec_node->ir_node;
struct graph* graph = node->graph;
struct tensor* input_tensor = get_ir_graph_tensor(graph, node->input_tensors[0]);
struct tensor* gamma_tensor = get_ir_graph_tensor(graph, node->input_tensors[1]);
struct tensor* beta_tensor = get_ir_graph_tensor(graph, node->input_tensors[2]);
struct tensor* output_tensor = get_ir_graph_tensor(graph, node->output_tensors[0]);
struct layernorm_Param* param = (struct layernorm_Param*)node->op.param_mem;
float eps = param->eps;
int ret = -1;
if (input_tensor->data_type == TENGINE_DT_FP32)
ret = ref_layernorm_fp32(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps);
else if (input_tensor->data_type == TENGINE_DT_UINT8)
ret = ref_layernorm_uint8(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps);
return ret;
}
static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* exec_node)
{
return OPS_SCORE_BEST;
}
static struct node_ops hcl_node_ops = {.prerun = NULL,
.run = run,
.reshape = NULL,
.postrun = NULL,
.init_node = init_node,
.release_node = release_node,
.score = score};
int register_layernorm_ref_op()
{
return register_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops);
}
int unregister_layernorm_ref_op()
{
return unregister_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops);
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, Open AI Lab
* Author: Shijie Chen
*/
#include "timvx_executor.hpp"
extern "C"
{
#include "operator/op.h"
}
bool VXEngine::AddGeluNode(struct node* ir_node)
{
struct graph* ir_graph = ir_node->graph;
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
auto gelu = graph->CreateOperation<tim::vx::ops::Gelu>();
(*gelu)
.BindInputs({ this->vx_tensor_map[input_tensor->index] })
.BindOutputs({ this->vx_tensor_map[output_tensor->index] });
return true;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, Open AI Lab
* Author: Shijie Chen
*/
#include "timvx_executor.hpp"
extern "C"
{
#include "operator/op.h"
#include "layernorm_param.h"
}
bool VXEngine::AddLayerNormNode(struct node* ir_node)
{
struct graph* ir_graph = ir_node->graph;
std::vector<std::shared_ptr<tim::vx::Tensor> > bn_in_tensor(ir_node->input_num);
int in_set[3] = {0, 2, 1};
for (int i = 0; i < ir_node->input_num; i++)
{
int idx = in_set[i];
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[idx]);
bn_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
}
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
struct layernorm_Param* param = (struct layernorm_Param*)ir_node->op.param_mem;
auto layernorm = graph->CreateOperation<tim::vx::ops::LayerNormalization>(0, param->eps);
(*layernorm)
.BindInputs({ bn_in_tensor })
.BindOutputs({ this->vx_tensor_map[output_tensor->index] });
return true;
}
......@@ -365,6 +365,12 @@ int VXEngine::Build(struct subgraph* subgraph)
case OP_L2NORMALIZATION:
this->AddL2normalizationNode(ir_node);
break;
case OP_GELU:
this->AddGeluNode(ir_node);
break;
case OP_LAYERNORM:
this->AddLayerNormNode(ir_node);
break;
default:
fprintf(stderr, "Tengine TIM-VX: Cannot support OP(%d).\n", ir_node->index);
break;
......
......@@ -79,6 +79,7 @@ extern "C" {
#include "tim/vx/ops/transpose.h"
#include "tim/vx/ops/spatial_transformer.h"
#include "tim/vx/ops/l2normalization.h"
#include "tim/vx/ops/layernormalization.h"
#define SPEC_TYPE_CONV 1
#define SPEC_TYPE_CONV_BIAS 2
......@@ -145,6 +146,8 @@ private:
bool AddUpsampleNode(struct node* ir_node);
bool AddSpatialtransformerNode(struct node* ir_node);
bool AddL2normalizationNode(struct node* ir_node);
bool AddGeluNode(struct node* ir_node);
bool AddLayerNormNode(struct node* ir_node);
public:
std::shared_ptr<tim::vx::Context> context;
......
......@@ -131,5 +131,7 @@ const int timvx_supported_ops[] = {
// OP_WHERE,
// OP_SOFTPLUS,
// OP_RECIPROCAL,
OP_GELU,
OP_LAYERNORM,
// OP_BUILTIN_LAST
};
......@@ -140,6 +140,7 @@ enum
OP_SPATIALTRANSFORMER,
OP_EXPAND,
OP_GELU,
OP_LAYERNORM,
OP_BUILTIN_LAST
};
......
......@@ -127,3 +127,4 @@
#define OP_SPATIALTRANSFORMER_NAME "SpatialTransformer"
#define OP_EXPAND_NAME "Expand"
#define OP_GELU_NAME "Gelu"
#define OP_LAYERNORM_NAME "LayerNorm"
\ No newline at end of file
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: Shijie Chen
*/
#include "layernorm_param.h"
#include "api/c_api.h"
#include "graph/tensor.h"
#include "graph/node.h"
#include "graph/graph.h"
#include "module/module.h"
#include "utility/sys_port.h"
#include "utility/log.h"
#include <string.h>
static int infer_shape(struct node* node)
{
struct graph* graph = node->graph;
struct tensor* input = get_ir_graph_tensor(graph, node->input_tensors[0]);
struct tensor* output = get_ir_graph_tensor(graph, node->output_tensors[0]);
set_ir_tensor_shape(output, input->dims, input->dim_num);
return 0;
}
static int init_op(struct op* op)
{
struct layernorm_Param* param = (struct layernorm_Param*)sys_malloc(sizeof(struct layernorm_Param));
if (param == NULL)
{
return -1;
}
/*set the param default value */
memset(param, 0, sizeof(struct layernorm_Param));
op->param_mem = param;
op->param_size = sizeof(struct layernorm_Param);
op->same_shape = 0;
op->infer_shape = infer_shape;
return 0;
}
static void release_op(struct op* op)
{
sys_free(op->param_mem);
}
int register_layernorm_op()
{
struct method m;
m.version = 1;
m.init = init_op;
m.release = release_op;
return register_op(OP_LAYERNORM, OP_LAYERNORM_NAME, &m);
}
int unregister_layernorm_op()
{
return unregister_op(OP_LAYERNORM, 1);
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: Shijie Chen
*/
#ifndef __LAYERNORM_PARAM_H__
#define __LAYERNORM_PARAM_H__
struct layernorm_Param
{
float eps;
};
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: Shijie Chen
*/
#include "layernorm_param.h"
#include "graph/tensor.h"
#include "graph/node.h"
#include "graph/graph.h"
#include "graph/subgraph.h"
#include "module/module.h"
#include "serializer/serializer.h"
#include "tmfile/tm2_serializer.h"
#include "device/device.h"
#include "utility/log.h"
static int layernorm_op_map(int op)
{
return OP_LAYERNORM;
}
static int tm2_load_layernorm(struct graph* ir_graph, struct node* ir_node, const TM2_Node* tm_node,
const TM2_Operator* tm_op)
{
struct layernorm_Param* gather_param = (struct layernorm_Param*)ir_node->op.param_mem;
const struct tm2_priv* tm2_priv = (struct tm2_priv*)ir_graph->serializer_privacy;
const char* mem_base = tm2_priv->base;
const TM2_LayerNormParam* tm_param = (TM2_LayerNormParam*)(mem_base + tm_op->offset_t_param);
gather_param->eps = tm_param->eps;
return 0;
}
int register_tm2_layernorm_op()
{
struct serializer* tm2_s = find_serializer_via_name("tengine");
if (tm2_s == NULL)
{
TLOG_ERR("tengine serializer has not been registered yet\n");
return -1;
}
tm2_s->register_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm, layernorm_op_map, NULL);
return 0;
}
int unregister_tm2_layernorm_op()
{
struct serializer* tm2_s = find_serializer_via_name("tengine");
tm2_s->unregister_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm);
return 0;
}
......@@ -151,6 +151,8 @@ typedef uint8_t tm_bool_t; /* bool is 1-byte unsigned integer */
#define TM2_OPSTR_SPATIALTRANSFORMER "SpatialTransformer"
#define TM2_OPSTR_EXPAND "Expand"
#define TM2_OPSTR_GELU "Gelu"
#define TM2_OPSTR_LAYERNORM "LayerNorm"
/* Operator types */
#define TM2_OPTYPE_ACCURACY 0 /* No Param */
#define TM2_OPTYPE_BATCHNORMALIZATION 1 /* TM2_BatchNormParam */
......@@ -258,7 +260,8 @@ typedef uint8_t tm_bool_t; /* bool is 1-byte unsigned integer */
#define TM2_OPTYPE_RECIPROCAL 103
#define TM2_OPTYPE_SPATIALTRANSFORMER 105
#define TM2_OPTYPE_GELU 106
#define TM2_OPTYPE_NUM 107
#define TM2_OPTYPE_LAYERNORM 107
#define TM2_OPTYPE_NUM 108
/* --------------------- -------- TM objects -------------------------------- */
typedef struct
......@@ -1006,6 +1009,11 @@ typedef struct
int dim_num;
} TM2_ExpandParam;
typedef struct
{
float eps;
} TM2_LayerNormParam;
#ifdef __cplusplus
}
#endif
......
......@@ -2255,6 +2255,14 @@ static int load_gru(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& o
return 0;
}
static int load_layer_norm(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& onnx_node)
{
struct layernorm_Param* layernorm_param = (struct layernorm_Param*)node->op.param_mem;
layernorm_param->eps = GetAttributeOrDefault<float>(onnx_node, "epsilon", 1e-5);
return 0;
}
/*
* OPERAOTR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER START
*/
......@@ -2342,6 +2350,7 @@ void onnx_serializer::register_op_load()
op_load_map["Unsqueeze"] = std::pair<int, op_load_t>(OP_UNSQUEEZE, load_unsqueeze);
op_load_map["Where"] = std::pair<int, op_load_t>(OP_WHERE, load_no_param);
op_load_map["Gelu"] = std::pair<int, op_load_t>(OP_GELU, load_no_param);
op_load_map["LayerNorm"] = std::pair<int, op_load_t>(OP_LAYERNORM, load_layer_norm);
}
/*
* OPERATOR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER END
......
......@@ -103,6 +103,7 @@ extern "C" {
#include "tile_param.h"
#include "expand_param.h"
#include "spatialtransformer_param.h"
#include "layernorm_param.h"
#ifdef __cplusplus
}
......
......@@ -1422,6 +1422,23 @@ tm_uoffset_t SaveTmReciprocalOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir
return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
}
tm_uoffset_t SaveTmGeluOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node)
{
TM2_Operator tm_op;
SetTmOperator(&tm_op, TM2_OPTYPE_GELU, TM2_NOT_SET);
return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
}
tm_uoffset_t SaveTmLayerNormOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node)
{
struct layernorm_Param* p = (struct layernorm_Param*)node->op.param_mem;
TM2_LayerNormParam tm_param;
tm_param.eps = p->eps;
TM2_Operator tm_op;
SetTmOperator(&tm_op, TM2_OPTYPE_LAYERNORM, WriteTmObject(start_ptr, cur_pos, &tm_param, sizeof(TM2_LayerNormParam)));
return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
}
op_save_t SaveTmOpFunc(uint32_t op_type)
{
switch (op_type)
......@@ -1606,6 +1623,10 @@ op_save_t SaveTmOpFunc(uint32_t op_type)
return SaveTmMaximumOp;
case OP_MINIMUM:
return SaveTmMinimumOp;
case OP_GELU:
return SaveTmGeluOp;
case OP_LAYERNORM:
return SaveTmLayerNormOp;
default:
// fprintf(stderr, "Operator #%d not supported in tengine model yet\n", op_type);
return nullptr;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册