diff --git a/source/device/cpu/op/layernorm/layernorm_ref.c b/source/device/cpu/op/layernorm/layernorm_ref.c
new file mode 100644
index 0000000000000000000000000000000000000000..1a90e705ec6e90141c7bde15f74e54e7c3195800
--- /dev/null
+++ b/source/device/cpu/op/layernorm/layernorm_ref.c
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author: Shijie Chen
+ */
+
+#include "layernorm_param.h"
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "utility/sys_port.h"
+#include "utility/float.h"
+#include "utility/log.h"
+#include "device/cpu/cpu_node.h"
+#include "device/cpu/cpu_graph.h"
+#include "device/cpu/cpu_module.h"
+
+#include <math.h>
+
+static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int release_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    return 0;
+}
+
+static int ref_layernorm_fp32(struct tensor* input_tensor, struct tensor* output_tensor,
+                              struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps)
+{
+#if 1
+    // TIM-VX
+    int norm_size = input_tensor->dims[input_tensor->dim_num - 1];
+    int count = 1;
+    for (int i = 0; i < input_tensor->dim_num - 1; i++)
+    {
+        count *= input_tensor->dims[i];
+    }
+#else
+    // PyTorch
+    int norm_size = gamma_tensor->elem_num;
+    int count = input_tensor->elem_num / gamma_tensor->elem_num;
+#endif
+
+    const float* input_data = (const float*)input_tensor->data;
+    float* output_data = (float*)output_tensor->data;
+
+    const float* gamma_data = (const float*)gamma_tensor->data;
+    const float* beta_data = (const float*)beta_tensor->data;
+
+    for (int i = 0; i < count; i++)
+    {
+        float sum = 0.f;
+        float sqsum = 0.f;
+        for (int j = 0; j < norm_size; j++)
+        {
+            float x = input_data[i * norm_size + j];
+            sum += x;
+            sqsum += x * x;
+        }
+        float mean = sum / norm_size;
+        float var = sqsum / norm_size - mean * mean;
+        float a = 1.0f / sqrtf(var + eps);
+        float b = -mean * a;
+        for (int j = 0; j < norm_size; j++)
+        {
+            int offset = i * norm_size + j;
+            output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j];
+        }
+    }
+
+    return 0;
+}
+
+static int ref_layernorm_uint8(struct tensor* input_tensor, struct tensor* output_tensor,
+                               struct tensor* gamma_tensor, struct tensor* beta_tensor, float eps)
+{
+#if 1
+    // TIM-VX
+    int norm_size = input_tensor->dims[input_tensor->dim_num - 1];
+    int count = 1;
+    for (int i = 0; i < input_tensor->dim_num - 1; i++)
+    {
+        count *= input_tensor->dims[i];
+    }
+#else
+    // PyTorch
+    int norm_size = gamma_tensor->elem_num;
+    int count = input_tensor->elem_num / gamma_tensor->elem_num;
+#endif
+
+    int total_size = input_tensor->elem_num;
+    float* input_data = (float*)sys_malloc(total_size * sizeof(float));
+    float* output_data = (float*)sys_malloc(total_size * sizeof(float));
+
+    // dequant
+    {
+        const uint8_t* input_uint8 = (const uint8_t*)input_tensor->data;
+        float input_scale = input_tensor->scale;
+        int input_zero = input_tensor->zero_point;
+
+        for (int i = 0; i < total_size; i++)
+            input_data[i] = ((float)input_uint8[i] - (float)input_zero) * input_scale;
+    }
+
+    const float* gamma_data = (const float*)gamma_tensor->data;
+    const float* beta_data = (const float*)beta_tensor->data;
+
+    for (int i = 0; i < count; i++)
+    {
+        float sum = 0.f;
+        float sqsum = 0.f;
+        for (int j = 0; j < norm_size; j++)
+        {
+            float x = input_data[i * norm_size + j];
+            sum += x;
+            sqsum += x * x;
+        }
+        float mean = sum / norm_size;
+        float var = sqsum / norm_size - mean * mean;
+        float a = 1.0f / sqrtf(var + eps);
+        float b = -mean * a;
+        for (int j = 0; j < norm_size; j++)
+        {
+            int offset = i * norm_size + j;
+            output_data[offset] = (input_data[offset] * a + b) * gamma_data[j] + beta_data[j];
+        }
+    }
+
+    // quant
+    {
+        uint8_t* output_uint8 = (uint8_t*)output_tensor->data;
+        float output_scale = output_tensor->scale;
+        int output_zero = output_tensor->zero_point;
+        for (int i = 0; i < total_size; i++)
+        {
+            int udata = (int)roundf(output_data[i] / output_scale + output_zero);
+            if (udata > 255)
+                udata = 255;
+            else if (udata < 0)
+                udata = 0;
+            output_uint8[i] = udata;
+        }
+    }
+
+    sys_free(input_data);
+    sys_free(output_data);
+    return 0;
+}
+
+static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
+{
+    struct node* node = exec_node->ir_node;
+    struct graph* graph = node->graph;
+
+    struct tensor* input_tensor = get_ir_graph_tensor(graph, node->input_tensors[0]);
+    struct tensor* gamma_tensor = get_ir_graph_tensor(graph, node->input_tensors[1]);
+    struct tensor* beta_tensor = get_ir_graph_tensor(graph, node->input_tensors[2]);
+
+    struct tensor* output_tensor = get_ir_graph_tensor(graph, node->output_tensors[0]);
+
+    struct layernorm_Param* param = (struct layernorm_Param*)node->op.param_mem;
+    float eps = param->eps;
+
+    int ret = -1;
+    if (input_tensor->data_type == TENGINE_DT_FP32)
+        ret = ref_layernorm_fp32(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps);
+    else if (input_tensor->data_type == TENGINE_DT_UINT8)
+        ret = ref_layernorm_uint8(input_tensor, output_tensor, gamma_tensor, beta_tensor, eps);
+
+    return ret;
+}
+
+static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* exec_node)
+{
+    return OPS_SCORE_BEST;
+}
+
+static struct node_ops hcl_node_ops = {.prerun = NULL,
+                                       .run = run,
+                                       .reshape = NULL,
+                                       .postrun = NULL,
+                                       .init_node = init_node,
+                                       .release_node = release_node,
+                                       .score = score};
+
+int register_layernorm_ref_op()
+{
+    return register_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops);
+}
+
+int unregister_layernorm_ref_op()
+{
+    return unregister_builtin_node_ops(OP_LAYERNORM, &hcl_node_ops);
+}
diff --git a/source/device/tim-vx/op/timvx_gelu.cc b/source/device/tim-vx/op/timvx_gelu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..85a32e2f2535fbb59e3fccb0c889f91abfbb2b27
--- /dev/null
+++ b/source/device/tim-vx/op/timvx_gelu.cc
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: Shijie Chen
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "operator/op.h"
+}
+
+
+bool VXEngine::AddGeluNode(struct node* ir_node)
+{
+    struct graph* ir_graph = ir_node->graph;
+
+    struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto gelu = graph->CreateOperation<tim::vx::ops::Gelu>();
+    (*gelu)
+        .BindInputs({ this->vx_tensor_map[input_tensor->index] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->index] });
+
+    return true;
+}
+
diff --git a/source/device/tim-vx/op/timvx_layernorm.cc b/source/device/tim-vx/op/timvx_layernorm.cc
new file mode 100644
index 0000000000000000000000000000000000000000..78dd73f79bc903d6a104bb759d2fd9a3d2df2144
--- /dev/null
+++ b/source/device/tim-vx/op/timvx_layernorm.cc
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: Shijie Chen
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "operator/op.h"
+#include "layernorm_param.h"
+}
+
+
+bool VXEngine::AddLayerNormNode(struct node* ir_node)
+{
+    struct graph* ir_graph = ir_node->graph;
+
+    std::vector<std::shared_ptr<tim::vx::Tensor> > bn_in_tensor(ir_node->input_num);
+
+    int in_set[3] = {0, 2, 1};
+    for (int i = 0; i < ir_node->input_num; i++)
+    {
+        int idx = in_set[i];
+        struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[idx]);
+        bn_in_tensor[i] = this->vx_tensor_map[input_tensor->index];
+    }
+    struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct layernorm_Param* param = (struct layernorm_Param*)ir_node->op.param_mem;
+
+    auto layernorm = graph->CreateOperation<tim::vx::ops::LayerNormalization>(0, param->eps);
+    (*layernorm)
+            .BindInputs({ bn_in_tensor })
+            .BindOutputs({ this->vx_tensor_map[output_tensor->index] });
+
+    return true;
+}
+
diff --git a/source/device/tim-vx/timvx_executor.cc b/source/device/tim-vx/timvx_executor.cc
index 7fc0c5da8fc5ff054fdcc9200225bb52f87b4a55..e7a9350c26fb328f2fcdfa8685616e2f90d0f121 100644
--- a/source/device/tim-vx/timvx_executor.cc
+++ b/source/device/tim-vx/timvx_executor.cc
@@ -365,6 +365,12 @@ int VXEngine::Build(struct subgraph* subgraph)
             case OP_L2NORMALIZATION:
                 this->AddL2normalizationNode(ir_node);
                 break;
+            case OP_GELU:
+                this->AddGeluNode(ir_node);
+                break;
+            case OP_LAYERNORM:
+                this->AddLayerNormNode(ir_node);
+                break;
             default:
                 fprintf(stderr, "Tengine TIM-VX: Cannot support OP(%d).\n", ir_node->index);
                 break;
diff --git a/source/device/tim-vx/timvx_executor.hpp b/source/device/tim-vx/timvx_executor.hpp
index b6a5222052f89dee15a2b1043a7d70488ed3b492..0be2ecf630487ae37fa725345dd3d63d45cfa7b7 100644
--- a/source/device/tim-vx/timvx_executor.hpp
+++ b/source/device/tim-vx/timvx_executor.hpp
@@ -79,6 +79,7 @@ extern "C" {
 #include "tim/vx/ops/transpose.h"
 #include "tim/vx/ops/spatial_transformer.h"
 #include "tim/vx/ops/l2normalization.h"
+#include "tim/vx/ops/layernormalization.h"
 
 #define SPEC_TYPE_CONV      1
 #define SPEC_TYPE_CONV_BIAS 2
@@ -145,6 +146,8 @@ private:
     bool AddUpsampleNode(struct node* ir_node);
     bool AddSpatialtransformerNode(struct node* ir_node);
     bool AddL2normalizationNode(struct node* ir_node);
+    bool AddGeluNode(struct node* ir_node);
+    bool AddLayerNormNode(struct node* ir_node);
 
 public:
     std::shared_ptr<tim::vx::Context> context;
diff --git a/source/device/tim-vx/timvx_limit.hpp b/source/device/tim-vx/timvx_limit.hpp
index c751393d33e9b2376b6d7546621d09eff9d026d5..5fbb0f564e1c9432fe6acbffaecd178c54288adb 100644
--- a/source/device/tim-vx/timvx_limit.hpp
+++ b/source/device/tim-vx/timvx_limit.hpp
@@ -131,5 +131,7 @@ const int timvx_supported_ops[] = {
     //    OP_WHERE,
     //    OP_SOFTPLUS,
     //    OP_RECIPROCAL,
+    OP_GELU,
+    OP_LAYERNORM,
     //    OP_BUILTIN_LAST
 };
diff --git a/source/operator/op.h b/source/operator/op.h
index d95828af68426ae023761a395a32d31550f1129e..dde05fbdf9a736b13ded38e559bee44fa53c869d 100644
--- a/source/operator/op.h
+++ b/source/operator/op.h
@@ -140,6 +140,7 @@ enum
     OP_SPATIALTRANSFORMER,
     OP_EXPAND,
     OP_GELU,
+    OP_LAYERNORM,
     OP_BUILTIN_LAST
 };
 
diff --git a/source/operator/op_name.h b/source/operator/op_name.h
index 29e238ead4726f280872c1c4d5856ecba2db20f3..83699e3bd01c6a012cba0546427b5cb2530cdb34 100644
--- a/source/operator/op_name.h
+++ b/source/operator/op_name.h
@@ -127,3 +127,4 @@
 #define OP_SPATIALTRANSFORMER_NAME    "SpatialTransformer"
 #define OP_EXPAND_NAME                "Expand"
 #define OP_GELU_NAME                  "Gelu"
+#define OP_LAYERNORM_NAME             "LayerNorm"
\ No newline at end of file
diff --git a/source/operator/prototype/layernorm.c b/source/operator/prototype/layernorm.c
new file mode 100644
index 0000000000000000000000000000000000000000..32db15afb2a456ab8fc45163bd4dc7666f992e14
--- /dev/null
+++ b/source/operator/prototype/layernorm.c
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author: Shijie Chen
+ */
+
+#include "layernorm_param.h"
+
+#include "api/c_api.h"
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "module/module.h"
+#include "utility/sys_port.h"
+#include "utility/log.h"
+
+#include <string.h>
+
+static int infer_shape(struct node* node)
+{
+    struct graph* graph = node->graph;
+    struct tensor* input = get_ir_graph_tensor(graph, node->input_tensors[0]);
+    struct tensor* output = get_ir_graph_tensor(graph, node->output_tensors[0]);
+
+    set_ir_tensor_shape(output, input->dims, input->dim_num);
+
+    return 0;
+}
+
+static int init_op(struct op* op)
+{
+    struct layernorm_Param* param = (struct layernorm_Param*)sys_malloc(sizeof(struct layernorm_Param));
+
+    if (param == NULL)
+    {
+        return -1;
+    }
+
+    /*set the param default value */
+    memset(param, 0, sizeof(struct layernorm_Param));
+    op->param_mem = param;
+    op->param_size = sizeof(struct layernorm_Param);
+    op->same_shape = 0;
+    op->infer_shape = infer_shape;
+
+    return 0;
+}
+
+static void release_op(struct op* op)
+{
+    sys_free(op->param_mem);
+}
+
+int register_layernorm_op()
+{
+    struct method m;
+
+    m.version = 1;
+    m.init = init_op;
+    m.release = release_op;
+
+    return register_op(OP_LAYERNORM, OP_LAYERNORM_NAME, &m);
+}
+
+int unregister_layernorm_op()
+{
+    return unregister_op(OP_LAYERNORM, 1);
+}
diff --git a/source/operator/prototype/layernorm_param.h b/source/operator/prototype/layernorm_param.h
new file mode 100644
index 0000000000000000000000000000000000000000..116cc87426f5418580b0f08aed3de6b140aecbb0
--- /dev/null
+++ b/source/operator/prototype/layernorm_param.h
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author: Shijie Chen
+ */
+
+#ifndef __LAYERNORM_PARAM_H__
+#define __LAYERNORM_PARAM_H__
+
+struct layernorm_Param
+{
+    float eps;
+};
+
+#endif
diff --git a/source/serializer/tmfile/op/tm2_layernorm.c b/source/serializer/tmfile/op/tm2_layernorm.c
new file mode 100644
index 0000000000000000000000000000000000000000..4645e8405e187d59427faa8a2b39f9251f35086a
--- /dev/null
+++ b/source/serializer/tmfile/op/tm2_layernorm.c
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author: Shijie Chen
+ */
+
+#include "layernorm_param.h"
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "graph/subgraph.h"
+#include "module/module.h"
+#include "serializer/serializer.h"
+#include "tmfile/tm2_serializer.h"
+#include "device/device.h"
+#include "utility/log.h"
+
+static int layernorm_op_map(int op)
+{
+    return OP_LAYERNORM;
+}
+
+static int tm2_load_layernorm(struct graph* ir_graph, struct node* ir_node, const TM2_Node* tm_node,
+                                 const TM2_Operator* tm_op)
+{
+    struct layernorm_Param* gather_param = (struct layernorm_Param*)ir_node->op.param_mem;
+    const struct tm2_priv* tm2_priv = (struct tm2_priv*)ir_graph->serializer_privacy;
+    const char* mem_base = tm2_priv->base;
+    const TM2_LayerNormParam* tm_param = (TM2_LayerNormParam*)(mem_base + tm_op->offset_t_param);
+
+    gather_param->eps = tm_param->eps;
+
+    return 0;
+}
+
+int register_tm2_layernorm_op()
+{
+    struct serializer* tm2_s = find_serializer_via_name("tengine");
+
+    if (tm2_s == NULL)
+    {
+        TLOG_ERR("tengine serializer has not been registered yet\n");
+        return -1;
+    }
+
+    tm2_s->register_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm, layernorm_op_map, NULL);
+
+    return 0;
+}
+
+int unregister_tm2_layernorm_op()
+{
+    struct serializer* tm2_s = find_serializer_via_name("tengine");
+
+    tm2_s->unregister_op_loader(tm2_s, TM2_OPTYPE_LAYERNORM, 1, tm2_load_layernorm);
+
+    return 0;
+}
diff --git a/source/serializer/tmfile/tm2_format.h b/source/serializer/tmfile/tm2_format.h
index 7211c9df18a564904e67f602808de7e04ad916f4..5abbfc1535bf88da8b3ec447113f791754f9789a 100644
--- a/source/serializer/tmfile/tm2_format.h
+++ b/source/serializer/tmfile/tm2_format.h
@@ -151,6 +151,8 @@ typedef uint8_t tm_bool_t;     /* bool is 1-byte unsigned integer */
 #define TM2_OPSTR_SPATIALTRANSFORMER   "SpatialTransformer"
 #define TM2_OPSTR_EXPAND               "Expand"
 #define TM2_OPSTR_GELU                 "Gelu"
+#define TM2_OPSTR_LAYERNORM            "LayerNorm"
+
 /* Operator types */
 #define TM2_OPTYPE_ACCURACY             0  /* No Param                 */
 #define TM2_OPTYPE_BATCHNORMALIZATION   1  /* TM2_BatchNormParam       */
@@ -258,7 +260,8 @@ typedef uint8_t tm_bool_t;     /* bool is 1-byte unsigned integer */
 #define TM2_OPTYPE_RECIPROCAL           103
 #define TM2_OPTYPE_SPATIALTRANSFORMER   105
 #define TM2_OPTYPE_GELU                 106
-#define TM2_OPTYPE_NUM                  107
+#define TM2_OPTYPE_LAYERNORM            107
+#define TM2_OPTYPE_NUM                  108
 /* --------------------- -------- TM objects -------------------------------- */
 
 typedef struct
@@ -1006,6 +1009,11 @@ typedef struct
     int dim_num;
 } TM2_ExpandParam;
 
+typedef struct
+{
+    float eps;
+} TM2_LayerNormParam;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tools/convert_tool/onnx/onnx2tengine.cpp b/tools/convert_tool/onnx/onnx2tengine.cpp
index f898dd085186c95f1d9cb97576bca0a78867bc18..29de06c4c5973f7fcdcfc7191a0bc1e1826153fb 100644
--- a/tools/convert_tool/onnx/onnx2tengine.cpp
+++ b/tools/convert_tool/onnx/onnx2tengine.cpp
@@ -2255,6 +2255,14 @@ static int load_gru(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& o
     return 0;
 }
 
+static int load_layer_norm(ir_graph_t* graph, ir_node_t* node, const onnx::NodeProto& onnx_node)
+{
+    struct layernorm_Param* layernorm_param = (struct layernorm_Param*)node->op.param_mem;
+    layernorm_param->eps = GetAttributeOrDefault<float>(onnx_node, "epsilon", 1e-5);
+
+    return 0;
+}
+
 /*
 *   OPERAOTR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER START
 */
@@ -2342,6 +2350,7 @@ void onnx_serializer::register_op_load()
     op_load_map["Unsqueeze"] = std::pair<int, op_load_t>(OP_UNSQUEEZE, load_unsqueeze);
     op_load_map["Where"] = std::pair<int, op_load_t>(OP_WHERE, load_no_param);
     op_load_map["Gelu"] = std::pair<int, op_load_t>(OP_GELU, load_no_param);
+    op_load_map["LayerNorm"] = std::pair<int, op_load_t>(OP_LAYERNORM, load_layer_norm);
 }
 /*
 *   OPERATOR REGISTER FUNCTION DEFINE FOR ONNX SERIALIZER END
diff --git a/tools/save_graph/op_include.h b/tools/save_graph/op_include.h
index 1b1bb0d78d4b8de1da1a4704946af0309e962703..af0fabf49d0b28809a1d8a0e55f07399f0a3b178 100644
--- a/tools/save_graph/op_include.h
+++ b/tools/save_graph/op_include.h
@@ -103,6 +103,7 @@ extern "C" {
 #include "tile_param.h"
 #include "expand_param.h"
 #include "spatialtransformer_param.h"
+#include "layernorm_param.h"
 
 #ifdef __cplusplus
 }
diff --git a/tools/save_graph/tm2_op_save.cpp b/tools/save_graph/tm2_op_save.cpp
index 2c8a015a7529d9259ff0cc52bf7b78c86a13e163..c22604db7881fd8a64a32ce31867b96b7b793007 100644
--- a/tools/save_graph/tm2_op_save.cpp
+++ b/tools/save_graph/tm2_op_save.cpp
@@ -1422,6 +1422,23 @@ tm_uoffset_t SaveTmReciprocalOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir
     return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
 }
 
+tm_uoffset_t SaveTmGeluOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node)
+{
+    TM2_Operator tm_op;
+    SetTmOperator(&tm_op, TM2_OPTYPE_GELU, TM2_NOT_SET);
+    return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
+}
+
+tm_uoffset_t SaveTmLayerNormOp(void* const start_ptr, tm_uoffset_t* cur_pos, ir_node_t* node)
+{
+    struct layernorm_Param* p = (struct layernorm_Param*)node->op.param_mem;
+    TM2_LayerNormParam tm_param;
+    tm_param.eps = p->eps;
+    TM2_Operator tm_op;
+    SetTmOperator(&tm_op, TM2_OPTYPE_LAYERNORM, WriteTmObject(start_ptr, cur_pos, &tm_param, sizeof(TM2_LayerNormParam)));
+    return WriteTmObject(start_ptr, cur_pos, &tm_op, sizeof(TM2_Operator));
+}
+
 op_save_t SaveTmOpFunc(uint32_t op_type)
 {
     switch (op_type)
@@ -1606,6 +1623,10 @@ op_save_t SaveTmOpFunc(uint32_t op_type)
         return SaveTmMaximumOp;
     case OP_MINIMUM:
         return SaveTmMinimumOp;
+    case OP_GELU:
+        return SaveTmGeluOp;
+    case OP_LAYERNORM:
+        return SaveTmLayerNormOp;
     default:
         // fprintf(stderr, "Operator #%d not supported in tengine model yet\n", op_type);
         return nullptr;