diff --git a/source/device/cpu/op/layernorm/layernorm_ref.c b/source/device/cpu/op/layernorm/layernorm_ref.c new file mode 100644 index 0000000000000000000000000000000000000000..1a90e705ec6e90141c7bde15f74e54e7c3195800 --- /dev/null +++ b/source/device/cpu/op/layernorm/layernorm_ref.c @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, OPEN AI LAB + * Author: Shijie Chen + */ + +#include "layernorm_param.h" + +#include "graph/tensor.h" +#include "graph/node.h" +#include "graph/graph.h" +#include "utility/sys_port.h" +#include "utility/float.h" +#include "utility/log.h" +#include "device/cpu/cpu_node.h" +#include "device/cpu/cpu_graph.h" +#include "device/cpu/cpu_module.h" + +#include + +static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph) +{ + return 0; +} + +static int release_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph) +{ + return 0; +} + +static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph) +{ + return 0; +} + +static int ref_layernorm_fp32(struct tensor* input_tensor, struct tensor* output_tensor, + struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps) +{ +#if 1 + // TIM-VX + int norm_size = input_tensor->dims[input_tensor->dim_num - 1]; + int count = 1; + for (int i = 0; i < input_tensor->dim_num - 1; i++) + { + count *= input_tensor->dims[i]; + } +#else + // PyTorch + int norm_size = gamma_tensor->elem_num; + int count = input_tensor->elem_num / gamma_tensor->elem_num; +#endif + + const float* input_data = (const float*)input_tensor->data; + float* output_data = (float*)output_tensor->data; + + const float* gamma_data = (const float*)gamma_tensor->data; + const float* beta_data = (const float*)beta_tensor->data; + + for (int i = 0; i < count; i++) + { + float sum = 0.f; + float sqsum = 0.f; + for (int j = 0; j < norm_size; j++) + { + float x = input_data[i * norm_size + j]; + sum += x; + sqsum += x * x; + } + float mean = sum / norm_size; + float var = sqsum / norm_size - mean * mean; + float a = 1.0f / sqrtf(var + eps); + float b = -mean * a; + for (int j = 0; j < norm_size; j++) + { + int offset = i * norm_size + j; + output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j]; + } + } + + return 0; +} + +static int ref_layernorm_uint8(struct tensor* input_tensor, struct tensor* output_tensor, + struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps) +{ +#if 1 + // TIM-VX + int norm_size = input_tensor->dims[input_tensor->dim_num - 1]; + int count = 1; + for (int i = 0; i < input_tensor->dim_num - 1; i++) + { + count *= input_tensor->dims[i]; + } +#else + // PyTorch + int norm_size = gamma_tensor->elem_num; + int count = input_tensor->elem_num / gamma_tensor->elem_num; +#endif + + int total_size = input_tensor->elem_num; + float* input_data = (float*)sys_malloc(total_size * sizeof(float)); + float* output_data = (float*)sys_malloc(total_size * sizeof(float)); + + // dequant + { + const uint8_t* input_uint8 = (const uint8_t*)input_tensor->data; + float input_scale = input_tensor->scale; + int input_zero = input_tensor->zero_point; + + for (int i = 0; i < total_size; i++) + input_data[i] = ((float)input_uint8[i] - (float)input_zero) * input_scale; + } + + const float* gamma_data = (const float*)gamma_tensor->data; + const float* beta_data = (const float*)beta_tensor->data; + + for (int i = 0; i < count; i++) + { + float sum = 0.f; + float sqsum = 0.f; + for (int j = 0; j < norm_size; j++) + { + float x = input_data[i * norm_size + j]; + sum += x; + sqsum += x * x; + } + float mean = sum / norm_size; + float var = sqsum / norm_size - mean * mean; + float a = 1.0f / sqrtf(var + eps); + float b = -mean * a; + for (int j = 0; j < norm_size; j++) + { + int offset = i * norm_size + j; + output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j]; + } + } + + // quant + { + uint8_t* output_uint8 = (uint8_t*)output_tensor->data; + float output_scale = output_tensor->scale; + int output_zero = output_tensor->zero_point; + for (int i = 0; i < total_size; i++) + { + int udata = (int)roundf(output_data[i] / output_scale + output_zero); + if (udata > 255) + udata = 255; + else if (udata < 0) + udata = 0; + output_uint8[i] = udata; + } + } + + sys_free(input_data); + sys_free(output_data); + return 0; +} + +static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph) +{ + struct node* node = exec_node->ir_node; + struct graph* graph = node->graph; + + struct tensor* input_tensor = get_ir_graph_tensor(graph, node->input_tensors[0]); + struct tensor* gamma_tensor = get_ir_graph_tensor(graph, node->input_tensors[1]); + struct tensor* beta_tensor = get_ir_graph_tensor(graph, node->input_tensors[2]); + + struct tensor* output_tensor = get_ir_graph_tensor(graph, node->output_tensors[0]); + + struct layernorm_Param* param = (struct layernorm_Param*)node->op.param_mem; + float eps = param->eps; + + int ret = -1; + if (input_tensor->data_type == TENGINE_DT_FP32) + ret = ref_layernorm_fp32(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps); + else if (input_tensor->data_type == TENGINE_DT_UINT8) + ret = ref_layernorm_uint8(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps); + + return ret; +} + +static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* exec_node) +{ + return OPS_SCORE_BEST; +} + +static struct node_ops hcl_node_ops = {.prerun = NULL, + .run = run, + .reshape = NULL, + .postrun = NULL, + .init_node = init_node, + .release_node = release_node, + .score = score}; + +int register_layernorm_ref_op() +{ + return register_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops); +} + +int unregister_layernorm_ref_op() +{ + return unregister_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops); +} diff --git a/source/device/tim-vx/op/timvx_gelu.cc b/source/device/tim-vx/op/timvx_gelu.cc new file mode 100644 index 0000000000000000000000000000000000000000..85a32e2f2535fbb59e3fccb0c889f91abfbb2b27 --- /dev/null +++ b/source/device/tim-vx/op/timvx_gelu.cc @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, Open AI Lab + * Author: Shijie Chen + */ + +#include "timvx_executor.hpp" + +extern "C" +{ +#include "operator/op.h" +} + + +bool VXEngine::AddGeluNode(struct node* ir_node) +{ + struct graph* ir_graph = ir_node->graph; + + struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]); + struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]); + + auto gelu = graph->CreateOperation(); + (*gelu) + .BindInputs({ this->vx_tensor_map[input_tensor->index] }) + .BindOutputs({ this->vx_tensor_map[output_tensor->index] }); + + return true; +} + diff --git a/source/device/tim-vx/op/timvx_layernorm.cc b/source/device/tim-vx/op/timvx_layernorm.cc new file mode 100644 index 0000000000000000000000000000000000000000..78dd73f79bc903d6a104bb759d2fd9a3d2df2144 --- /dev/null +++ b/source/device/tim-vx/op/timvx_layernorm.cc @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, Open AI Lab + * Author: Shijie Chen + */ + +#include "timvx_executor.hpp" + +extern "C" +{ +#include "operator/op.h" +#include "layernorm_param.h" +} + + +bool VXEngine::AddLayerNormNode(struct node* ir_node) +{ + struct graph* ir_graph = ir_node->graph; + + std::vector > bn_in_tensor(ir_node->input_num); + + int in_set[3] = {0, 2, 1}; + for (int i = 0; i < ir_node->input_num; i++) + { + int idx = in_set[i]; + struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[idx]); + bn_in_tensor[i] = this->vx_tensor_map[input_tensor->index]; + } + struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]); + + struct layernorm_Param* param = (struct layernorm_Param*)ir_node->op.param_mem; + + auto layernorm = graph->CreateOperation(0, param->eps); + (*layernorm) + .BindInputs({ bn_in_tensor }) + .BindOutputs({ this->vx_tensor_map[output_tensor->index] }); + + return true; +} + diff --git a/source/device/tim-vx/timvx_executor.cc b/source/device/tim-vx/timvx_executor.cc index 7fc0c5da8fc5ff054fdcc9200225bb52f87b4a55..e7a9350c26fb328f2fcdfa8685616e2f90d0f121 100644 --- a/source/device/tim-vx/timvx_executor.cc +++ b/source/device/tim-vx/timvx_executor.cc @@ -365,6 +365,12 @@ int VXEngine::Build(struct subgraph* subgraph) case OP_L2NORMALIZATION: this->AddL2normalizationNode(ir_node); break; + case OP_GELU: + this->AddGeluNode(ir_node); + break; + case OP_LAYERNORM: + this->AddLayerNormNode(ir_node); + break; default: fprintf(stderr, "Tengine TIM-VX: Cannot support OP(%d).\n", ir_node->index); break; diff --git a/source/device/tim-vx/timvx_executor.hpp b/source/device/tim-vx/timvx_executor.hpp index b6a5222052f89dee15a2b1043a7d70488ed3b492..0be2ecf630487ae37fa725345dd3d63d45cfa7b7 100644 --- a/source/device/tim-vx/timvx_executor.hpp +++ b/source/device/tim-vx/timvx_executor.hpp @@ -79,6 +79,7 @@ extern "C" { #include "tim/vx/ops/transpose.h" #include "tim/vx/ops/spatial_transformer.h" #include "tim/vx/ops/l2normalization.h" +#include "tim/vx/ops/layernormalization.h" #define SPEC_TYPE_CONV 1 #define SPEC_TYPE_CONV_BIAS 2 @@ -145,6 +146,8 @@ private: bool AddUpsampleNode(struct node* ir_node); bool AddSpatialtransformerNode(struct node* ir_node); bool AddL2normalizationNode(struct node* ir_node); + bool AddGeluNode(struct node* ir_node); + bool AddLayerNormNode(struct node* ir_node); public: std::shared_ptr context; diff --git a/source/device/tim-vx/timvx_limit.hpp b/source/device/tim-vx/timvx_limit.hpp index c751393d33e9b2376b6d7546621d09eff9d026d5..5fbb0f564e1c9432fe6acbffaecd178c54288adb 100644 --- a/source/device/tim-vx/timvx_limit.hpp +++ b/source/device/tim-vx/timvx_limit.hpp @@ -131,5 +131,7 @@ const int timvx_supported_ops[] = { // OP_WHERE, // OP_SOFTPLUS, // OP_RECIPROCAL, + OP_GELU, + OP_LAYERNORM, // OP_BUILTIN_LAST }; diff --git a/source/operator/op.h b/source/operator/op.h index d95828af68426ae023761a395a32d31550f1129e..dde05fbdf9a736b13ded38e559bee44fa53c869d 100644 --- a/source/operator/op.h +++ b/source/operator/op.h @@ -140,6 +140,7 @@ enum OP_SPATIALTRANSFORMER, OP_EXPAND, OP_GELU, + OP_LAYERNORM, OP_BUILTIN_LAST }; diff --git a/source/operator/op_name.h b/source/operator/op_name.h index 29e238ead4726f280872c1c4d5856ecba2db20f3..83699e3bd01c6a012cba0546427b5cb2530cdb34 100644 --- a/source/operator/op_name.h +++ b/source/operator/op_name.h @@ -127,3 +127,4 @@ #define OP_SPATIALTRANSFORMER_NAME "SpatialTransformer" #define OP_EXPAND_NAME "Expand" #define OP_GELU_NAME "Gelu" +#define OP_LAYERNORM_NAME "LayerNorm" \ No newline at end of file diff --git a/source/operator/prototype/layernorm.c b/source/operator/prototype/layernorm.c new file mode 100644 index 0000000000000000000000000000000000000000..32db15afb2a456ab8fc45163bd4dc7666f992e14 --- /dev/null +++ b/source/operator/prototype/layernorm.c @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, OPEN AI LAB + * Author: Shijie Chen + */ + +#include "layernorm_param.h" + +#include "api/c_api.h" +#include "graph/tensor.h" +#include "graph/node.h" +#include "graph/graph.h" +#include "module/module.h" +#include "utility/sys_port.h" +#include "utility/log.h" + +#include + +static int infer_shape(struct node* node) +{ + struct graph* graph = node->graph; + struct tensor* input = get_ir_graph_tensor(graph, node->input_tensors[0]); + struct tensor* output = get_ir_graph_tensor(graph, node->output_tensors[0]); + + set_ir_tensor_shape(output, input->dims, input->dim_num); + + return 0; +} + +static int init_op(struct op* op) +{ + struct layernorm_Param* param = (struct layernorm_Param*)sys_malloc(sizeof(struct layernorm_Param)); + + if (param == NULL) + { + return -1; + } + + /*set the param default value */ + memset(param, 0, sizeof(struct layernorm_Param)); + op->param_mem = param; + op->param_size = sizeof(struct layernorm_Param); + op->same_shape = 0; + op->infer_shape = infer_shape; + + return 0; +} + +static void release_op(struct op* op) +{ + sys_free(op->param_mem); +} + +int register_layernorm_op() +{ + struct method m; + + m.version = 1; + m.init = init_op; + m.release = release_op; + + return register_op(OP_LAYERNORM, OP_LAYERNORM_NAME, &m); +} + +int unregister_layernorm_op() +{ + return unregister_op(OP_LAYERNORM, 1); +} diff --git a/source/operator/prototype/layernorm_param.h b/source/operator/prototype/layernorm_param.h new file mode 100644 index 0000000000000000000000000000000000000000..116cc87426f5418580b0f08aed3de6b140aecbb0 --- /dev/null +++ b/source/operator/prototype/layernorm_param.h @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, OPEN AI LAB + * Author: Shijie Chen + */ + +#ifndef __LAYERNORM_PARAM_H__ +#define __LAYERNORM_PARAM_H__ + +struct layernorm_Param +{ + float eps; +}; + +#endif diff --git a/source/serializer/tmfile/op/tm2_layernorm.c b/source/serializer/tmfile/op/tm2_layernorm.c new file mode 100644 index 0000000000000000000000000000000000000000..4645e8405e187d59427faa8a2b39f9251f35086a --- /dev/null +++ b/source/serializer/tmfile/op/tm2_layernorm.c @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Copyright (c) 2021, OPEN AI LAB + * Author: Shijie Chen + */ + +#include "layernorm_param.h" + +#include "graph/tensor.h" +#include "graph/node.h" +#include "graph/graph.h" +#include "graph/subgraph.h" +#include "module/module.h" +#include "serializer/serializer.h" +#include "tmfile/tm2_serializer.h" +#include "device/device.h" +#include "utility/log.h" + +static int layernorm_op_map(int op) +{ + return OP_LAYERNORM; +} + +static int tm2_load_layernorm(struct graph* ir_graph, struct node* ir_node, const TM2_Node* tm_node, + const TM2_Operator* tm_op) +{ + struct layernorm_Param* gather_param = (struct layernorm_Param*)ir_node->op.param_mem; + const struct tm2_priv* tm2_priv = (struct tm2_priv*)ir_graph->serializer_privacy; + const char* mem_base = tm2_priv->base; + const TM2_LayerNormParam* tm_param = (TM2_LayerNormParam*)(mem_base + tm_op->offset_t_param); + + gather_param->eps = tm_param->eps; + + return 0; +} + +int register_tm2_layernorm_op() +{ + struct serializer* tm2_s = find_serializer_via_name("tengine"); + + if (tm2_s == NULL) + { + TLOG_ERR("tengine serializer has not been registered yet\n"); + return -1; + } + + tm2_s->register_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm, layernorm_op_map, NULL); + + return 0; +} + +int unregister_tm2_layernorm_op() +{ + struct serializer* tm2_s = find_serializer_via_name("tengine"); + + tm2_s->unregister_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm); + + return 0; +} diff --git a/source/serializer/tmfile/tm2_format.h b/source/serializer/tmfile/tm2_format.h index 7211c9df18a564904e67f602808de7e04ad916f4..5abbfc1535bf88da8b3ec447113f791754f9789a 100644 --- a/source/serializer/tmfile/tm2_format.h +++ b/source/serializer/tmfile/tm2_format.h @@ -151,6 +151,8 @@ typedef uint8_t tm_bool_t; /* bool is 1-byte unsigned integer */ #define TM2_OPSTR_SPATIALTRANSFORMER "SpatialTransformer" #define TM2_OPSTR_EXPAND "Expand" #define TM2_OPSTR_GELU "Gelu" +#define TM2_OPSTR_LAYERNORM "LayerNorm" + /* Operator types */ #define TM2_OPTYPE_ACCURACY 0 /* No Param */ #define TM2_OPTYPE_BATCHNORMALIZATION 1 /* TM2_BatchNormParam */ @@ -258,7 +260,8 @@ typedef uint8_t tm_bool_t; /* bool is 1-byte unsigned integer */ #define TM2_OPTYPE_RECIPROCAL 103 #define TM2_OPTYPE_SPATIALTRANSFORMER 105 #define TM2_OPTYPE_GELU 106 -#define TM2_OPTYPE_NUM 107 +#define TM2_OPTYPE_LAYERNORM 107 +#define TM2_OPTYPE_NUM 108 /* --------------------- -------- TM objects -------------------------------- */ typedef struct @@ -1006,6 +1009,11 @@ typedef struct int dim_num; } TM2_ExpandParam; +typedef struct +{ + float eps; +} TM2_LayerNormParam; + #ifdef __cplusplus } #endif diff --git a/tools/convert_tool/onnx/onnx2tengine.cpp b/tools/convert_tool/onnx/onnx2tengine.cpp index f898dd085186c95f1d9cb97576bca0a78867bc18..29de06c4c5973f7fcdcfc7191a0bc1e1826153fb 100644 --- a/tools/convert_tool/onnx/onnx2tengine.cpp +++ b/tools/convert_tool/onnx/onnx2tengine.cpp @@ -2255,6 +2255,14 @@ static int load_gru(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& o return 0; } +static int load_layer_norm(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& onnx_node) +{ + struct layernorm_Param* layernorm_param = (struct layernorm_Param*)node->op.param_mem; + layernorm_param->eps = GetAttributeOrDefault(onnx_node, "epsilon", 1e-5); + + return 0; +} + /* * OPERAOTR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER START */ @@ -2342,6 +2350,7 @@ void onnx_serializer::register_op_load() op_load_map["Unsqueeze"] = std::pair(OP_UNSQUEEZE, load_unsqueeze); op_load_map["Where"] = std::pair(OP_WHERE, load_no_param); op_load_map["Gelu"] = std::pair(OP_GELU, load_no_param); + op_load_map["LayerNorm"] = std::pair(OP_LAYERNORM, load_layer_norm); } /* * OPERATOR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER END diff --git a/tools/save_graph/op_include.h b/tools/save_graph/op_include.h index 1b1bb0d78d4b8de1da1a4704946af0309e962703..af0fabf49d0b28809a1d8a0e55f07399f0a3b178 100644 --- a/tools/save_graph/op_include.h +++ b/tools/save_graph/op_include.h @@ -103,6 +103,7 @@ extern "C" { #include "tile_param.h" #include "expand_param.h" #include "spatialtransformer_param.h" +#include "layernorm_param.h" #ifdef __cplusplus } diff --git a/tools/save_graph/tm2_op_save.cpp b/tools/save_graph/tm2_op_save.cpp index 2c8a015a7529d9259ff0cc52bf7b78c86a13e163..c22604db7881fd8a64a32ce31867b96b7b793007 100644 --- a/tools/save_graph/tm2_op_save.cpp +++ b/tools/save_graph/tm2_op_save.cpp @@ -1422,6 +1422,23 @@ tm_uoffset_t SaveTmReciprocalOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator)); } +tm_uoffset_t SaveTmGeluOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node) +{ + TM2_Operator tm_op; + SetTmOperator(&tm_op, TM2_OPTYPE_GELU, TM2_NOT_SET); + return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator)); +} + +tm_uoffset_t SaveTmLayerNormOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node) +{ + struct layernorm_Param* p = (struct layernorm_Param*)node->op.param_mem; + TM2_LayerNormParam tm_param; + tm_param.eps = p->eps; + TM2_Operator tm_op; + SetTmOperator(&tm_op, TM2_OPTYPE_LAYERNORM, WriteTmObject(start_ptr, cur_pos, &tm_param, sizeof(TM2_LayerNormParam))); + return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator)); +} + op_save_t SaveTmOpFunc(uint32_t op_type) { switch (op_type) @@ -1606,6 +1623,10 @@ op_save_t SaveTmOpFunc(uint32_t op_type) return SaveTmMaximumOp; case OP_MINIMUM: return SaveTmMinimumOp; + case OP_GELU: + return SaveTmGeluOp; + case OP_LAYERNORM: + return SaveTmLayerNormOp; default: // fprintf(stderr, "Operator #%d not supported in tengine model yet\n", op_type); return nullptr;