update timvx ops

99be9a49 · BUG1989 · 6a4a0754 · 99be9a49 · 99be9a49 · 99be9a49
24 changed file
--- a/README.md
+++ b/README.md
@@ -54,14 +54,18 @@ Tengine Lite 核心代码由 4 个模块组成：

 ### 量化工具

- [预编译版本](tools/quantize/README.md)：提供 Ubuntu 18.04 系统上预编译好的模型量化工具，已支持uint8/int8；
+- [预编译版本](tools/quantize/README.md)：提供 Ubuntu 18.04 系统上预编译好的模型量化工具，已支持uint8/int8。

 ### 速度评估

 - [Benchmark](benchmark/) 基础网络速度评估工具，欢迎大家更新。

+### NPU Plugin
+
+- [TIM-VX](doc/npu_tim-vx_user_manual.md) VeriSilicon NPU 使用指南。
+
 ### AutoKernel Plugin
- [AutoKernel](https://github.com/OAID/AutoKernel.git) 是一个简单易用，低门槛的自动算子优化工具，AutoKernel Plugin实现了自动优化算子一键部署到Tengine中；
+- [AutoKernel](https://github.com/OAID/AutoKernel.git) 是一个简单易用，低门槛的自动算子优化工具，AutoKernel Plugin实现了自动优化算子一键部署到Tengine中。

 ## Roadmap


--- a/doc/npu_tim-vx_user_manual.md
+++ b/doc/npu_tim-vx_user_manual.md
@@ -134,3 +134,10 @@ Repeat 10 times, thread 1, avg time 2.95 ms, max_time 3.42 ms, min_time 2.76 ms
 32.045452, 277
 30.780502, 282
 ```
+
+### 4. Support list
+| Vendor  | Devices      |
+| ------- | ------------ |
+| Amlogic | A311D        |
+| NXP     | i.MX 8M Plus |
+| X86-64  | Simulator    |
--- a/doc/roadmap.md
+++ b/doc/roadmap.md
@@ -7,8 +7,8 @@
 - [ ] fix the Float32 bugs of Vulkan 
 - [ ] support the mode type of PaddlePaddle
 - [x] support the mode type of OneFlow
- [x] opensource the plugin implement of NPU (A311D)
+- [x] opensource the plugin implement of NPU (VeriSilicon NPU IP)
 - [x] opensource the plugin implement of CUDA
 - [x] opensource the plugin implement of TensorRT
- [ ] opensource the plugin implement of NNIE
+- [x] opensource the plugin implement of NNIE
 - [x] add more test case
--- a/examples/tm_ultraface.cpp
+++ b/examples/tm_ultraface.cpp
@@ -137,7 +137,7 @@ static void nms(std::vector<FaceInfo>& input, std::vector<FaceInfo>& output, int
                break;
            }
            default: {
-                printf("wrong type of nms.");
+                fprintf(stderr, "wrong type of nms.");
                exit(-1);
            }
        }
@@ -219,12 +219,12 @@ static void post_process_ultraface(const char* image_file, float *boxs_data, flo
    std::vector<FaceInfo> face_list;
    nms(bbox_collection, face_list);

-    printf("detected face num: %d\n", face_list.size());
+    fprintf(stderr, "detected face num: %d\n", face_list.size());
    for (int i = 0; i < face_list.size(); i++)
    {
        FaceInfo box = face_list[i];
        draw_box(im, box.x1, box.y1, box.x2, box.y2, 4, 255, 0, 0);
-        printf("BOX %.2f:(%.2f, %.2f),(%.2f, %.2f)\n", box.score, box.x1, box.y1, box.x2, box.y2);
+        fprintf(stderr, "BOX %.2f:(%.2f, %.2f),(%.2f, %.2f)\n", box.score, box.x1, box.y1, box.x2, box.y2);
    }

    save_image(im, "tengine_example_out");

--- a/src/dev/tim-vx/op/timvx_concat.cc
+++ b/src/dev/tim-vx/op/timvx_concat.cc
@@ -44,13 +44,28 @@ bool VXEngine::AddConcatNode(struct ir_node* ir_node)
    }

    struct concat_param* param = (struct concat_param*)ir_node->op.param_mem;
-
    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);

-    auto concat = graph->CreateOperation<tim::vx::ops::Concat>(output_tensor->dim_num - param->axis - 1, ir_node->input_num);
-    (*concat)
-        .BindInputs(concat_in_tensor)
-        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
-
+    if (ir_node->input_num == 1)
+    {
+        struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+        std::vector<uint32_t> perm;
+        for (int i = output_tensor->dim_num - 1; i >= 0; i--)
+        {
+            perm.push_back(output_tensor->dims[i]);
+        }
+        auto reshape = graph->CreateOperation<tim::vx::ops::Reshape>(perm);
+        (*reshape)
+            .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+            .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+    }
+    else
+    {
+        auto concat = graph->CreateOperation<tim::vx::ops::Concat>(output_tensor->dim_num - param->axis - 1, ir_node->input_num);
+        (*concat)
+            .BindInputs(concat_in_tensor)
+            .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+    }
+    
    return true;
 }
--- a/src/dev/tim-vx/op/timvx_depth2space.cc
+++ b/src/dev/tim-vx/op/timvx_depth2space.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "depthtospace_param.h"
+}
+
+
+bool VXEngine::AddDepthToSpaceNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_DEPTHTOSPACE.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct depthtospace_param* param = (struct depthtospace_param*)ir_node->op.param_mem;
+
+    auto depth2space = graph->CreateOperation<tim::vx::ops::DepthToSpace>(param->block_size);
+    (*depth2space)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_add.cc
+++ b/src/dev/tim-vx/op/timvx_add.cc
@@ -31,7 +31,7 @@ extern "C"
 }


-bool VXEngine::AddEltwisSumNode(struct ir_node* ir_node)
+bool VXEngine::AddEltwiseNode(struct ir_node* ir_node)
 {
    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_RELU.\n", ir_node->idx);
    struct ir_graph* ir_graph = ir_node->graph;
@@ -66,6 +66,14 @@ bool VXEngine::AddEltwisSumNode(struct ir_node* ir_node)
                .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
            break;
        }
+        case ELT_SUB:
+        {
+            auto eltsub = graph->CreateOperation<tim::vx::ops::Sub>();
+            (*eltsub)
+                .BindInputs(add_in_tensor)
+                .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+            break;
+        }
        default:
            break;
    }

--- a/src/dev/tim-vx/op/timvx_elu.cc
+++ b/src/dev/tim-vx/op/timvx_elu.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddEluNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_ELU.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto elu = graph->CreateOperation<tim::vx::ops::Elu>();
+    (*elu)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_gather.cc
+++ b/src/dev/tim-vx/op/timvx_gather.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "gather_param.h"
+}
+
+
+bool VXEngine::AddGatherNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_GATHER.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct gather_param* param = (struct gather_param*)ir_node->op.param_mem;
+
+    auto gather = graph->CreateOperation<tim::vx::ops::Gather>(param->axis);
+    (*gather)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_hardswish.cc
+++ b/src/dev/tim-vx/op/timvx_hardswish.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddHardSwishNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_HARDSWISH.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto hardswish = graph->CreateOperation<tim::vx::ops::HardSwish>();
+    (*hardswish)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_interp.cc
+++ b/src/dev/tim-vx/op/timvx_interp.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "interp_param.h"
+}
+
+
+bool VXEngine::AddInterpNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_INTERP.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct interp_param* param = (struct interp_param*)ir_node->op.param_mem;
+
+    tim::vx::ResizeType resize_type;
+    if (param->resize_type == 1)
+        resize_type = tim::vx::ResizeType::NEAREST_NEIGHBOR;
+    else if(param->resize_type == 2)
+        resize_type = tim::vx::ResizeType::BILINEAR;
+    else
+        fprintf(stderr, " Not support this resize type(%d)\n",resize_type);
+
+    auto resize = graph->CreateOperation<tim::vx::ops::Resize>(resize_type, 0.0f, false, false, param->output_height, param->output_width);
+    (*resize)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_prelu.cc
+++ b/src/dev/tim-vx/op/timvx_prelu.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+bool VXEngine::AddPReluNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_PRELU.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct relu_param* param = (struct relu_param*)ir_node->op.param_mem;
+
+//  TODO Fix
+//    if (param->negative_slope > 0.000001)
+//    {
+//        auto leaky_relu = this->graph->CreateOperation<tim::vx::ops::LeakyRelu>(0.1);
+//        (*leaky_relu).BindInput( this->vx_tensor_map[input_tensor->idx] )
+//            .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+//    }
+//    else
+//    {
+//        auto relu = this->graph->CreateOperation<tim::vx::ops::Relu>();
+//        (*relu).BindInput( this->vx_tensor_map[input_tensor->idx] )
+//            .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+//    }
+
+    return true;
+}
+
--- a/src/dev/tim-vx/op/timvx_relu.cc
+++ b/src/dev/tim-vx/op/timvx_relu.cc
@@ -27,9 +27,9 @@
 extern "C"
 {
 #include "tengine_op.h"
+#include "relu_param.h"
 }

-
 bool VXEngine::AddReluNode(struct ir_node* ir_node)
 {
    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_RELU.\n", ir_node->idx);
@@ -38,9 +38,20 @@ bool VXEngine::AddReluNode(struct ir_node* ir_node)
    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);

-    auto relu = this->graph->CreateOperation<tim::vx::ops::Relu>();
-    (*relu).BindInput( this->vx_tensor_map[input_tensor->idx] )
-        .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+    struct relu_param* param = (struct relu_param*)ir_node->op.param_mem;
+
+    if (param->negative_slope > 0.000001)
+    {
+        auto leaky_relu = this->graph->CreateOperation<tim::vx::ops::LeakyRelu>(0.1);
+        (*leaky_relu).BindInput( this->vx_tensor_map[input_tensor->idx] )
+            .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+    }
+    else
+    {
+        auto relu = this->graph->CreateOperation<tim::vx::ops::Relu>();
+        (*relu).BindInput( this->vx_tensor_map[input_tensor->idx] )
+            .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+    }

    return true;
 }
--- a/src/dev/tim-vx/op/timvx_relu1.cc
+++ b/src/dev/tim-vx/op/timvx_relu1.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddRelu1Node(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_RELU1.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto relu1 = graph->CreateOperation<tim::vx::ops::Relu1>();
+    (*relu1)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_reshape.cc
+++ b/src/dev/tim-vx/op/timvx_reshape.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddReshapeNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_RESHAPE.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    std::vector<uint32_t> perm;
+    for (int i = output_tensor->dim_num - 1; i >= 0; i--)
+    {
+        perm.push_back(output_tensor->dims[i]);
+    }
+
+    auto reshape = graph->CreateOperation<tim::vx::ops::Reshape>(perm);
+
+    (*reshape)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_sigmoid.cc
+++ b/src/dev/tim-vx/op/timvx_sigmoid.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddSigmoidNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_SIGMOID.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto sigmoid = graph->CreateOperation<tim::vx::ops::Sigmoid>();
+    (*sigmoid)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_slice.cc
+++ b/src/dev/tim-vx/op/timvx_slice.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "slice_param.h"
+}
+
+
+bool VXEngine::AddSliceNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_SLICE.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct slice_param* param = (struct slice_param*)ir_node->op.param_mem;
+
+    uint32_t axis = output_tensor->dim_num - param->axis;
+
+    std::vector<int32_t> start;
+    for (int i = output_tensor->dim_num - 1; i >= 0; i--)
+    {
+        if (axis == i)
+            start.push_back(param->begin);
+        else
+            start.push_back(0);
+    }
+
+    std::vector<int32_t> length;
+    for (int i = output_tensor->dim_num - 1; i >= 0; i--)
+    {
+        if (axis == i)
+            length.push_back(param->end - param->begin);
+        else
+            length.push_back(-1);
+    }
+
+    auto slice = this->graph->CreateOperation<tim::vx::ops::Slice>(output_tensor->dim_num, start, length);
+    (*slice).BindInput( this->vx_tensor_map[input_tensor->idx] )
+        .BindOutput({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_softmax.cc
+++ b/src/dev/tim-vx/op/timvx_softmax.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "softmax_param.h"
+}
+
+
+bool VXEngine::AddSoftmaxNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_SOFTMAX.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct softmax_param* param = (struct softmax_param*)ir_node->op.param_mem;
+
+    auto softmax = graph->CreateOperation<tim::vx::ops::Softmax>(1.0, 3 - param->axis);
+    (*softmax)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_space2depth.cc
+++ b/src/dev/tim-vx/op/timvx_space2depth.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+#include "spacetodepth_param.h"
+}
+
+
+bool VXEngine::AddSpaceToDepthNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_SPACETODEPTH.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    struct spacetodepth_param* param = (struct spacetodepth_param*)ir_node->op.param_mem;
+
+    std::vector<int> block_size;
+    block_size.push_back(param->block_size);
+
+    auto space2depth = graph->CreateOperation<tim::vx::ops::SpaceToDepth>(block_size);
+    (*space2depth)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_tanh.cc
+++ b/src/dev/tim-vx/op/timvx_tanh.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddTanhNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_TANH.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    auto tanh = graph->CreateOperation<tim::vx::ops::Tanh>();
+    (*tanh)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/op/timvx_upsample.cc
+++ b/src/dev/tim-vx/op/timvx_upsample.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: hhchen@openailab.com
+ */
+
+#include "timvx_executor.hpp"
+
+extern "C"
+{
+#include "tengine_op.h"
+}
+
+
+bool VXEngine::AddUpsampleNode(struct ir_node* ir_node)
+{
+    TLOG_INFO("Tengine TIM-VX: Support OP(%d) OP_UPSAMPLE.\n", ir_node->idx);
+    struct ir_graph* ir_graph = ir_node->graph;
+
+    struct ir_tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct ir_tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
+
+    tim::vx::ResizeType resize_type = tim::vx::ResizeType::NEAREST_NEIGHBOR;
+
+    auto resize = graph->CreateOperation<tim::vx::ops::Resize>(resize_type, 0.0f, false, false, output_tensor->dims[2], output_tensor->dims[3]);
+    (*resize)
+        .BindInputs({ this->vx_tensor_map[input_tensor->idx] })
+        .BindOutputs({ this->vx_tensor_map[output_tensor->idx] });
+
+    return true;
+}
--- a/src/dev/tim-vx/timvx_executor.cc
+++ b/src/dev/tim-vx/timvx_executor.cc
@@ -159,11 +159,17 @@ int VXEngine::Build(struct subgraph* subgraph)
            case OP_CONV:
                this->AddConvolutionNode(ir_node);
                break;
+            case OP_DEPTHTOSPACE:
+                this->AddDepthToSpaceNode(ir_node);
+                break;
            case OP_DROPOUT:
                this->AddDropoutNode(ir_node);
                break;
            case OP_ELTWISE:
-                this->AddEltwisSumNode(ir_node);
+                this->AddEltwiseNode(ir_node);
+                break;
+            case OP_ELU:
+                this->AddEluNode(ir_node);
                break;
            case OP_FC:
                this->AddFullyConnectionNode(ir_node);
@@ -171,23 +177,51 @@ int VXEngine::Build(struct subgraph* subgraph)
            case OP_FLATTEN:
                this->AddFlattenNode(ir_node);
                break;
+            case OP_GATHER:
+                this->AddGatherNode(ir_node);
+                break;
+            case OP_HARDSWISH:
+                this->AddHardSwishNode(ir_node);
+                break;
+            case OP_INTERP:
+                this->AddInterpNode(ir_node);
+                break;
 //            case OP_PERMUTE:
 //                this->AddPermuteNode(ir_graph, ir_node);
 //                break;
            case OP_POOL:
                this->AddPoolingNode(ir_node);
                break;
+            case OP_PRELU:
+                this->AddPReluNode(ir_node);
+                break;
            case OP_RELU:
                this->AddReluNode(ir_node);
                break;
-//            case OP_RESHAPE:
-//                this->AddReshapeNode(ir_graph, ir_node);
-//                break;
-//            case OP_SLICE:
-//                this->AddSliceNode(ir_graph, ir_node);
-//                break;
-//            case OP_SOFTMAX:
-//                this->AddSoftmaxNode(ir_graph, ir_node);
+            case OP_RELU1:
+                this->AddRelu1Node(ir_node);
+                break;
+            case OP_RESHAPE:
+                this->AddReshapeNode(ir_node);
+                break;
+            case OP_SIGMOID:
+                this->AddSigmoidNode(ir_node);
+                break;
+            case OP_SLICE:
+                this->AddSliceNode(ir_node);
+                break;
+            case OP_SOFTMAX:
+                this->AddSoftmaxNode(ir_node);
+                break;
+            case OP_SPACETODEPTH:
+                this->AddSpaceToDepthNode(ir_node);
+                break;
+            case OP_TANH:
+                this->AddTanhNode(ir_node);
+                break;
+            case OP_UPSAMPLE:
+                this->AddUpsampleNode(ir_node);
+                break;
            default:
                fprintf(stderr, "Tengine TIM-VX: Cannot support OP(%d).\n", ir_node->idx);
                break;

--- a/src/dev/tim-vx/timvx_executor.hpp
+++ b/src/dev/tim-vx/timvx_executor.hpp
@@ -43,13 +43,19 @@ extern "C"
 #include "tim/vx/operation.h"

 #include "tim/vx/ops/activations.h"
+#include "tim/vx/ops/depth2space.h"
 #include "tim/vx/ops/concat.h"
 #include "tim/vx/ops/conv2d.h"
 #include "tim/vx/ops/elementwise.h"
 #include "tim/vx/ops/fullyconnected.h"
+#include "tim/vx/ops/gather.h"
 #include "tim/vx/ops/pool2d.h"
 #include "tim/vx/ops/reshape.h"
+#include "tim/vx/ops/resize.h"
+#include "tim/vx/ops/slice.h"
 #include "tim/vx/ops/softmax.h"
+#include "tim/vx/ops/space2depth.h"
+
 #include "tim/vx/tensor.h"

 #include "convolution_param.h"
@@ -77,12 +83,27 @@ private:
    bool AddClipNode(struct ir_node* ir_node);
    bool AddConcatNode(struct ir_node* ir_node);
    bool AddConvolutionNode(struct ir_node* ir_node);
+    bool AddDepthToSpaceNode(struct ir_node* ir_node);
    bool AddDropoutNode(struct ir_node* ir_node);
-    bool AddEltwisSumNode(struct ir_node* ir_node);
+    bool AddEltwiseNode(struct ir_node* ir_node);
+    bool AddEluNode(struct ir_node* ir_node);
    bool AddFlattenNode(struct ir_node* ir_node);
    bool AddFullyConnectionNode(struct ir_node* node);
+    bool AddGatherNode(struct ir_node* node);
+    bool AddHardSwishNode(struct ir_node* node);
+    bool AddInterpNode(struct ir_node* ir_node);
    bool AddPoolingNode(struct ir_node* ir_node);
+    bool AddPReluNode(struct ir_node* ir_node);
    bool AddReluNode(struct ir_node* ir_node);
+    bool AddRelu1Node(struct ir_node* ir_node);
+    bool AddReshapeNode(struct ir_node* ir_node);
+    bool AddSigmoidNode(struct ir_node* ir_node);
+    bool AddSliceNode(struct ir_node* ir_node);
+    bool AddSoftmaxNode(struct ir_node* ir_node);
+    bool AddSpaceToDepthNode(struct ir_node* ir_node);
+    bool AddTanhNode(struct ir_node* ir_node);
+    bool AddUpsampleNode(struct ir_node* ir_node);
+




--- a/src/dev/tim-vx/timvx_limit.hpp
+++ b/src/dev/tim-vx/timvx_limit.hpp
@@ -32,19 +32,103 @@ extern "C"

 const int timvx_supported_ops[] = {

-        OP_CLIP,
-        OP_CONCAT,
-        OP_CONST,
-        OP_CONV,
-        OP_DROPOUT,
-        OP_ELTWISE,
-        OP_FC,
-        OP_FLATTEN,
-        OP_INPUT,
-//        OP_PERMUTE,
-        OP_POOL,
-        OP_RELU,
-        OP_RESHAPE,
-        OP_SLICE,
-        OP_SOFTMAX
+//    OP_GENERIC,
+//    OP_ABSVAL,
+//    OP_ADD_N,
+//    OP_ARGMAX,
+//    OP_ARGMIN,
+//    OP_BATCHNORM,
+//    OP_BATCHTOSPACEND,
+//    OP_BIAS,
+//    OP_BROADMUL,
+//    OP_CAST,
+//    OP_CEIL,
+    OP_CLIP,
+//    OP_COMPARISON,
+    OP_CONCAT,
+    OP_CONST,
+    OP_CONV,
+//    OP_CROP,
+//    OP_DECONV,
+    OP_DEPTHTOSPACE,
+//    OP_DETECTION_OUTPUT,
+//    OP_DETECTION_POSTPROCESS,
+    OP_DROPOUT,
+    OP_ELTWISE,
+    OP_ELU,
+//    OP_EMBEDDING,
+//    OP_EXPANDDIMS,
+    OP_FC,
+    OP_FLATTEN,
+//    OP_GATHER,
+//    OP_GEMM,
+//    OP_GRU,
+//    OP_HARDSIGMOID,
+    OP_HARDSWISH,
+    OP_INPUT,
+//    OP_INSTANCENORM,
+    OP_INTERP,
+//    OP_LOGICAL,
+//    OP_LOGISTIC,
+//    OP_LRN,
+//    OP_LSTM,
+//    OP_MATMUL,
+//    OP_MAXIMUM,
+//    OP_MEAN,
+//    OP_MINIMUM,
+//    OP_MVN,
+//    OP_NOOP,
+//    OP_NORMALIZE,
+//    OP_PAD,
+////    OP_PERMUTE,
+    OP_POOL,
+//    OP_PRELU,
+//    OP_PRIORBOX,
+//    OP_PSROIPOOLING,
+//    OP_REDUCEL2,
+//    OP_REDUCTION,
+//    OP_REGION,
+    OP_RELU,
+    OP_RELU1,
+    OP_RELU6,
+//    OP_REORG,
+    OP_RESHAPE,
+//    OP_RESIZE,
+//    OP_REVERSE,
+//    OP_RNN,
+//    OP_ROIALIGN,
+//    OP_ROIPOOLING,
+//    OP_ROUND,
+//    OP_RPN,
+//    OP_SCALE,
+//    OP_SELU,
+//    OP_SHUFFLECHANNEL,
+    OP_SIGMOID,
+    OP_SLICE,
+    OP_SOFTMAX,
+//    OP_SPACETOBATCHND,
+    OP_SPACETODEPTH,
+//    OP_SPARSETODENSE,
+//    OP_SPLIT,
+//    OP_SQUAREDDIFFERENCE,
+//    OP_SQUEEZE,
+//    OP_STRIDED_SLICE,
+//    OP_SWAP_AXIS,
+    OP_TANH,
+//    OP_THRESHOLD,
+//    OP_TOPKV2,
+//    OP_TRANSPOSE,
+//    OP_UNARY,
+//    OP_UNSQUEEZE,
+    OP_UPSAMPLE,
+//    OP_ZEROSLIKE,
+//    OP_MISH,
+//    OP_LOGSOFTMAX,
+//    OP_L2NORMALIZATION,
+//    OP_L2POOL,
+//    OP_TILE,
+//    OP_SHAPE,
+//    OP_SCATTER,
+//    OP_WHERE,
+//    OP_BUILTIN_LAST
 };