add some trt layer (#680)

* add abs addn hardswish mish transpose layer impl * fix print issues * add layers * use defined name * change dim limit to 8 * disable log level change for init * set default log level

add some trt layer (#680)
* add abs addn hardswish mish transpose layer impl * fix print issues * add layers * use defined name * change dim limit to 8 * disable log level change for init * set default log level
b18d6878 · kalcohol · GitHub · 6b98f365 · b18d6878 · b18d6878
19 changed file
--- a/source/api/c_api.c
+++ b/source/api/c_api.c
@@ -289,7 +289,7 @@ int init_tengine(void)
        return 0;
    }

-    set_log_level(LOG_ERR);
+    //set_log_level(LOG_ERR);

    int ret = register_all_op_prototype();
    if (0 != ret)

--- a/source/api/c_api.h
+++ b/source/api/c_api.h
@@ -55,7 +55,7 @@
 extern "C" {
 #endif

-#define MAX_SHAPE_DIM_NUM           4
+#define MAX_SHAPE_DIM_NUM           8

 /* the data type of the tensor */
 #define TENGINE_DT_FP32             0

--- a/source/defines.h.in
+++ b/source/defines.h.in
@@ -49,7 +49,7 @@
 #define TE_NODE_TYPE_INPUT          2
 #define TE_NODE_TYPE_OUTPUT         4

-#define TE_DEFAULT_LOG_LEVEL        LOG_DEBUG
+#define TE_DEFAULT_LOG_LEVEL        LOG_ERR
 #define TE_MAX_LOG_LENGTH           256

 #cmakedefine TENGINE_HAS_LIB_POSIX_THREAD

--- a/source/device/cpu/cpu_define.h
+++ b/source/device/cpu/cpu_define.h
@@ -38,6 +38,7 @@
 #define CPU_DEVICE_NAME             "CPU"

 #define TENGINE_DUMP_DIR            "TG_DEBUG_DUMP_DIR"
+#define TENGINE_DUMP_LAYER          "TG_DEBUG_DATA"
 #define TENGINE_PRINT_LAYER_COST    "TG_DEBUG_TIME"
 #define TENGINE_FORCE_USE_REF_OP    "TG_DEBUG_REF"


--- a/source/device/cpu/cpu_device.c
+++ b/source/device/cpu/cpu_device.c
@@ -190,7 +190,7 @@ static int run(struct device* dev, struct subgraph* subgraph)
            }
        }
 #endif
-        const char* env = getenv("TG_DEBUG_DATA");
+        const char* env = getenv(TENGINE_DUMP_LAYER);
        if (env && env[0] == '1')
        {
            struct graph* ir_graph = node->ir_node->graph;

--- a/source/device/tensorrt/op/trt_absval.cc
+++ b/source/device/tensorrt/op/trt_absval.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: lswang@openailab.com
+ */
+
+#include "../trt_executor.hpp"
+
+
+bool TensorRTEngine::AddAbsVal(struct graph* ir_graph, struct node* node)
+{
+    struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
+    struct tensor* output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
+
+    if (nullptr == input || nullptr == output)
+    {
+        fprintf(stderr, "Tengine: Get input & output for AbsVal(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    if (!check_if_input_in_map(input->index, this->tensor_swap_map))
+    {
+        fprintf(stderr, "Tengine: Query input for AbsVal(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]];
+
+    nvinfer1::IUnaryLayer* layer = this->network->addUnary(*input_tensor, nvinfer1::UnaryOperation::kABS);
+    layer->setName(node->name);
+
+    this->layer_map[node->index] = layer;
+
+    auto layer_output = layer->getOutput(0);
+
+    this->SetRange(output, layer_output);
+
+    this->tensor_real_map[output->index] = layer_output;
+    this->tensor_swap_map[output->index] = output->index;
+
+    return true;
+}
--- a/source/device/tensorrt/op/trt_addn.cc
+++ b/source/device/tensorrt/op/trt_addn.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: lswang@openailab.com
+ */
+
+#include "../trt_executor.hpp"
+
+
+bool TensorRTEngine::AddAddN(struct graph* ir_graph, struct node* node)
+{
+    struct tensor* output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
+    if (nullptr == output)
+    {
+        fprintf(stderr, "Tengine: Get output for AddN(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    std::vector<nvinfer1::ITensor*> input_tensors(node->input_num);
+
+    for (int i = 0; i < node->input_num; i++)
+    {
+        struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[i]);
+        if (nullptr == input)
+        {
+            fprintf(stderr, "Tengine: Get input(%d) for AddN(id: %d, name: %s) layer failed.\n", i, node->index, node->name);
+            return false;
+        }
+
+        if (!check_if_input_in_map(input->index, this->tensor_swap_map))
+        {
+            fprintf(stderr, "Tengine: Query input for AddN(id: %d, name: %s) layer failed.\n", node->index, node->name);
+            return false;
+        }
+
+        nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]];
+        input_tensors[i] = input_tensor;
+    }
+
+    if (input_tensors.size() == 1)
+    {
+        fprintf(stderr, "Tengine: Only 1 input for AddN(id: %d, name: %s) is not allowed.\n", node->index, node->name);
+        return false;
+    }
+
+    int count = 0;
+    nvinfer1::ITensor* temp_result = nullptr;
+    nvinfer1::IElementWiseLayer* layer = nullptr;
+    while (!input_tensors.empty())
+    {
+        if (nullptr == temp_result)
+        {
+            auto input_a = input_tensors[input_tensors.size() - 1];
+            auto input_b = input_tensors[input_tensors.size() - 2];
+
+            layer = this->network->addElementWise(*input_a, *input_b, nvinfer1::ElementWiseOperation::kSUM);
+            std::string layer_name = std::string(node->name) + "_" + std::to_string(count);
+            layer->setName(layer_name.c_str());
+
+            temp_result = layer->getOutput(0);
+
+            input_tensors.pop_back();
+            input_tensors.pop_back();
+        }
+        else
+        {
+            auto input = input_tensors[input_tensors.size() - 1];
+
+            layer = this->network->addElementWise(*input, *temp_result, nvinfer1::ElementWiseOperation::kSUM);
+            std::string layer_name = std::string(node->name) + "_" + std::to_string(count);
+            layer->setName(layer_name.c_str());
+
+            temp_result = layer->getOutput(0);
+
+            input_tensors.pop_back();
+        }
+
+        count++;
+    }
+
+    layer->setName(node->name);
+    this->layer_map[node->index] = layer;
+
+    this->SetRange(output, temp_result);
+
+    this->tensor_real_map[output->index] = temp_result;
+    this->tensor_swap_map[output->index] = output->index;
+
+    return true;
+}
--- a/source/device/tensorrt/op/trt_convolution.cc
+++ b/source/device/tensorrt/op/trt_convolution.cc
@@ -41,13 +41,13 @@ bool TensorRTEngine::AddConvolutionNode(struct graph* ir_graph, struct node *nod

    if (nullptr == conv_data || nullptr == conv_weight)
    {
-        fprintf(stderr, "Tengine: Get input data & weight for conv(id: %d, name: %s).\n", conv_weight->index, conv_weight->name);
+        fprintf(stderr, "Tengine: Get input data & weight for Convolution(id: %d, name: %s).\n", conv_weight->index, conv_weight->name);
        return false;
    }

    if (!check_if_input_in_map(conv_data->index, this->tensor_swap_map))
    {
-        fprintf(stderr, "Tengine: Query input for Concat(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Query input for Convolution(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }


--- a/source/device/tensorrt/op/trt_deconvolution.cc
+++ b/source/device/tensorrt/op/trt_deconvolution.cc
@@ -40,13 +40,13 @@ bool TensorRTEngine::AddDeConvolutionNode(struct graph* ir_graph, struct node *n

    if (nullptr == deconv_data || nullptr == deconv_weight)
    {
-        fprintf(stderr, "Tengine: Get input data & weight for deconv(id: %d, name: %s).\n", deconv_weight->index, deconv_weight->name);
+        fprintf(stderr, "Tengine: Get input data & weight for DeConvolution(id: %d, name: %s).\n", deconv_weight->index, deconv_weight->name);
        return false;
    }

    if (!check_if_input_in_map(deconv_data->index, this->tensor_swap_map))
    {
-        fprintf(stderr, "Tengine: Query input for Deconv(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Query input for DeConvolution(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }


--- a/source/device/tensorrt/op/trt_dropout.cc
+++ b/source/device/tensorrt/op/trt_dropout.cc
@@ -41,7 +41,7 @@ bool TensorRTEngine::AddDropoutNode(struct graph* ir_graph, struct node* node)
    nvinfer1::IShuffleLayer* layer = this->network->addShuffle(*drop_input_tensor);
    if (nullptr == layer)
    {
-        fprintf(stderr, "Tengine: Add Reshape(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Add Dropout(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }


--- a/source/device/tensorrt/op/trt_hardswish.cc
+++ b/source/device/tensorrt/op/trt_hardswish.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: lswang@openailab.com
+ */
+
+#include "../trt_executor.hpp"
+
+
+bool TensorRTEngine::AddHardSwishNode(struct graph* ir_graph, struct node* node)
+{
+    struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
+    struct tensor* output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
+
+    if (nullptr == input || nullptr == output)
+    {
+        fprintf(stderr, "Tengine: Get input & output for HardSwish(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    if (!check_if_input_in_map(input->index, this->tensor_swap_map))
+    {
+        fprintf(stderr, "Tengine: Query input for HardSwish(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    uint8_t add3_scale = 1, add3_shift = 3, add3_power = 1;
+    float div6_scale = 1 / 6.f, div6_shift = 0.f, div6_power = 1.f;
+
+    nvinfer1::ITensor* trt_tensor = tensor_real_map[tensor_swap_map[input->index]];
+
+    nvinfer1::Weights add3_scale_param{nvinfer1::DataType::kINT8, &add3_scale, 1};
+    nvinfer1::Weights add3_shift_param{nvinfer1::DataType::kINT8, &add3_shift, 1};
+    nvinfer1::Weights add3_power_param{nvinfer1::DataType::kINT8, &add3_power, 1};
+
+    nvinfer1::Weights div6_scale_param{nvinfer1::DataType::kFLOAT, &div6_scale, 1};
+    nvinfer1::Weights div6_shift_param{nvinfer1::DataType::kFLOAT, &div6_shift, 1};
+    nvinfer1::Weights div6_power_param{nvinfer1::DataType::kFLOAT, &div6_power, 1};
+
+    nvinfer1::IScaleLayer* add3_layer = this->network->addScale(*trt_tensor, nvinfer1::ScaleMode::kUNIFORM, add3_shift_param, add3_scale_param, add3_power_param);
+
+    std::string add3_layer_name = std::string(node->name) + "_add3";
+    add3_layer->setName(add3_layer_name.c_str());
+
+    auto add3_output = add3_layer->getOutput(0);
+
+    nvinfer1::IActivationLayer* relu6_layer = this->network->addActivation(*add3_output, nvinfer1::ActivationType::kRELU);
+    relu6_layer->setAlpha(6);
+    relu6_layer->setBeta(0);
+
+    std::string relu6_layer_name = std::string(node->name) + "_relu6";
+    relu6_layer->setName(relu6_layer_name.c_str());
+
+    auto relu6_output = relu6_layer->getOutput(0);
+
+    nvinfer1::IScaleLayer* div6_layer = this->network->addScale(*relu6_output, nvinfer1::ScaleMode::kUNIFORM, div6_shift_param, div6_scale_param, div6_power_param);
+
+    std::string div6_layer_name = std::string(node->name) + "_div6";
+    div6_layer->setName(div6_layer_name.c_str());
+
+    auto div6_output = relu6_layer->getOutput(0);
+
+    nvinfer1::IElementWiseLayer* product_layer = this->network->addElementWise(*trt_tensor, *div6_output, nvinfer1::ElementWiseOperation::kPROD);
+
+    std::string product_layer_name = std::string(node->name) + "_dot";
+    product_layer->setName(product_layer_name.c_str());
+
+    this->layer_map[node->index] = product_layer;
+
+    auto product_output = relu6_layer->getOutput(0);
+
+    this->SetRange(output, product_output);
+
+    this->tensor_real_map[output->index] = product_output;
+    this->tensor_swap_map[output->index] = output->index;
+
+    return true;
+}
--- a/source/device/tensorrt/op/trt_interp.cc
+++ b/source/device/tensorrt/op/trt_interp.cc
@@ -52,7 +52,7 @@ bool TensorRTEngine::AddInterpNode(struct graph* ir_graph, struct node* node)
    nvinfer1::IResizeLayer* layer = this->network->addResize(*interp_input_tensor);
    if (nullptr == layer)
    {
-        fprintf(stderr, "Tengine: Add Flatten(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Add Interp(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }


--- a/source/device/tensorrt/op/trt_mish.cc
+++ b/source/device/tensorrt/op/trt_mish.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: lswang@openailab.com
+ */
+
+#include "../trt_executor.hpp"
+
+
+/*
+ * y = x * tanh ( ln(1 + e^x) )
+ *   = x * ( (1 + e^x)^2 - 1 ) / ( (1 + e^x)^2 + 1 )
+ */
+bool TensorRTEngine::AddMishNode(struct graph* ir_graph, struct node* node)
+{
+    struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
+    struct tensor* output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
+
+    if (nullptr == input || nullptr == output)
+    {
+        fprintf(stderr, "Tengine: Get input & output for Mish(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    if (!check_if_input_in_map(input->index, this->tensor_swap_map))
+    {
+        fprintf(stderr, "Tengine: Query input for Mish(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]];
+
+    // get e^x
+    nvinfer1::IUnaryLayer* ex_layer = this->network->addUnary(*input_tensor, nvinfer1::UnaryOperation::kEXP);
+
+    std::string ex_layer_name = std::string(node->name) + "_ex";
+    ex_layer->setName(ex_layer_name.c_str());
+
+    auto ex_output = ex_layer->getOutput(0);
+
+    // get (1 + e^x)^2
+    int8_t ex_pos_1 = 1, ex_neg_1 = -1, ex_2 = 2;
+    nvinfer1::Weights ex_pos_1_param{nvinfer1::DataType::kINT8, &ex_pos_1, 1};
+    nvinfer1::Weights ex_2_param{nvinfer1::DataType::kINT8, &ex_2, 1};
+    nvinfer1::IScaleLayer* ex_scaled_layer = this->network->addScale(*ex_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_2_param);
+
+    std::string ex_scaled_layer_name = std::string(node->name) + "_scale";
+    ex_scaled_layer->setName(ex_scaled_layer_name.c_str());
+
+    auto ex_scaled_output = ex_scaled_layer->getOutput(0);
+
+    // get (1 + e^x)^2 + 1, (1 + e^x)^2 - 1
+    nvinfer1::Weights ex_neg_1_param{nvinfer1::DataType::kINT8, &ex_neg_1, 1};
+    nvinfer1::IScaleLayer* numerator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_pos_1_param);
+    nvinfer1::IScaleLayer* denominator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_neg_1_param, ex_pos_1_param);
+
+    std::string numerator_layer_name = std::string(node->name) + "_numerator";
+    std::string denominator_layer_name = std::string(node->name) + "_denominator";
+    numerator_layer->setName(numerator_layer_name.c_str());
+    denominator_layer->setName(denominator_layer_name.c_str());
+
+    auto numerator_output = numerator_layer->getOutput(0);
+    auto denominator_output = denominator_layer->getOutput(0);
+
+    // get { (1 + e^x)^2 + 1 } / { (1 + e^x)^2 - 1 }
+    nvinfer1::IElementWiseLayer* fraction_layer = this->network->addElementWise(*numerator_output, *denominator_output, nvinfer1::ElementWiseOperation::kDIV);
+
+    std::string fraction_layer_name = std::string(node->name) + "_fraction";
+    fraction_layer->setName(fraction_layer_name.c_str());
+
+    auto fraction_output = fraction_layer->getOutput(0);
+
+    // get x * { (1 + e^x)^2 + 1 } / { (1 + e^x)^2 - 1 }
+    nvinfer1::IElementWiseLayer* product_layer = this->network->addElementWise(*input_tensor, *fraction_output, nvinfer1::ElementWiseOperation::kPROD);
+
+    std::string product_layer_name = std::string(node->name) + "_product";
+    product_layer->setName(product_layer_name.c_str());
+
+    auto product_output = product_layer->getOutput(0);
+
+    this->layer_map[node->index] = product_layer;
+
+    this->SetRange(output, product_output);
+
+    this->tensor_real_map[output->index] = product_output;
+    this->tensor_swap_map[output->index] = output->index;
+
+    return true;
+}
--- a/source/device/tensorrt/op/trt_slice.cc
+++ b/source/device/tensorrt/op/trt_slice.cc
@@ -37,14 +37,14 @@ bool TensorRTEngine::AddSliceNode(struct graph* ir_graph, struct node* node)
    struct tensor* slice_output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
    if (nullptr == slice_input || nullptr == slice_output)
    {
-        fprintf(stderr, "Tengine: Get input & output for Flatten(id: %d, name: %s) layer failed.\n", node->index,
+        fprintf(stderr, "Tengine: Get input & output for Slice(id: %d, name: %s) layer failed.\n", node->index,
                node->name);
        return false;
    }

    if (!check_if_input_in_map(slice_input->index, this->tensor_swap_map))
    {
-        fprintf(stderr, "Tengine: Query input for Flatten(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Query input for Slice(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }

@@ -79,7 +79,7 @@ bool TensorRTEngine::AddSliceNode(struct graph* ir_graph, struct node* node)
    nvinfer1::ISliceLayer* layer = this->network->addSlice(*trt_tensor, start, size, stride);
    if (nullptr == layer)
    {
-        fprintf(stderr, "Tengine: Add Flatten(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        fprintf(stderr, "Tengine: Add Slice(id: %d, name: %s) layer failed.\n", node->index, node->name);
        return false;
    }


--- a/source/device/tensorrt/op/trt_transpose.cc
+++ b/source/device/tensorrt/op/trt_transpose.cc
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, Open AI Lab
+ * Author: lswang@openailab.com
+ */
+
+#include "../trt_executor.hpp"
+
+#include <NvInferRuntime.h>
+
+EXPORT_BEGIN
+#include "transpose_param.h"
+EXPORT_FINISH
+
+
+bool TensorRTEngine::AddTranspose(struct graph *ir_graph, struct node *node)
+{
+    struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
+    struct tensor* output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);
+    if (nullptr == input || nullptr == output)
+    {
+        fprintf(stderr, "Tengine: Get input & output for Transpose(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    if (!check_if_input_in_map(input->index, this->tensor_swap_map))
+    {
+        fprintf(stderr, "Tengine: Query input for Transpose(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    auto transpose_param = (struct transpose_param*)node->op.param_mem;
+    if (nullptr == transpose_param || transpose_param->tr_shape_size <= 0)
+    {
+        fprintf(stderr, "Tengine: TensorRT get transpose param failed.\n");
+        return false;
+    }
+
+    nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]];
+
+    nvinfer1::IShuffleLayer* layer = this->network->addShuffle(*input_tensor);
+    if (nullptr == layer)
+    {
+        fprintf(stderr, "Tengine: Add Transpose(id: %d, name: %s) layer failed.\n", node->index, node->name);
+        return false;
+    }
+
+    layer->setName(node->name);
+
+    nvinfer1::Dims dims{};
+    dims.nbDims = output->dim_num;
+
+    for (int i = 0; i < dims.nbDims; i++)
+        dims.d[i] = output->dims[i];
+
+    nvinfer1::Permutation order = { 0 };
+    for (int i = 0; i < transpose_param->tr_shape_size; i++)
+    {
+        order.order[i] = transpose_param->tr_shape[i];
+    }
+    for (int i = transpose_param->tr_shape_size; i < nvinfer1::Dims::MAX_DIMS; i++)
+    {
+        order.order[i] = 0;
+    }
+
+    layer->setZeroIsPlaceholder(false);
+
+    layer->setReshapeDimensions(dims);
+    layer->setFirstTranspose(order);
+
+    this->layer_map[node->index] = layer;
+
+    nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+
+    this->SetRange(output, output_tensor);
+
+    this->tensor_real_map[node->output_tensors[0]] = output_tensor;
+    this->tensor_swap_map[node->output_tensors[0]] = node->output_tensors[0];
+
+    return true;
+}
--- a/source/device/tensorrt/op/trt_upsample.cc
+++ b/source/device/tensorrt/op/trt_upsample.cc
@@ -31,7 +31,7 @@ EXPORT_FINISH
 #include <NvInferRuntime.h>


-bool TensorRTEngine::AddUpsampleNode(struct graph* ir_graph, struct node* node)
+bool TensorRTEngine::AddUpSampleNode(struct graph* ir_graph, struct node* node)
 {
    struct tensor* upsample_input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
    struct tensor* upsample_output = get_ir_graph_tensor(ir_graph, node->output_tensors[0]);

--- a/source/device/tensorrt/trt_executor.cc
+++ b/source/device/tensorrt/trt_executor.cc
@@ -139,7 +139,7 @@ int TensorRTEngine::Build(struct subgraph* subgraph)
            {
                if(!AddTensor(ir_graph, ir_tensor))
                {
-                    TLOG_ERR("Cannot add input tensor(id: %d, name: %s) from node(id: %d, name: %s).\n", ir_tensor->index, ir_tensor->name, ir_node->index, ir_node->name);
+                    TLOG_ERR("Tengine: Cannot add input tensor(id: %d, name: %s) from node(id: %d, name: %s).\n", ir_tensor->index, ir_tensor->name, ir_node->index, ir_node->name);
                    return -5;
                }
            }
@@ -154,6 +154,20 @@ int TensorRTEngine::Build(struct subgraph* subgraph)

        switch (op_type)
        {
+            case OP_ABSVAL:
+                if (!AddAbsVal(ir_graph, ir_node))
+                {
+                    TLOG_ERR("Tengine: Cannot add AbsVal op(%d).\n", ir_node->index);
+                    return -6;
+                }
+                break;
+            case OP_ADD_N:
+                if (!AddAddN(ir_graph, ir_node))
+                {
+                    TLOG_ERR("Tengine: Cannot add AddN op(%d).\n", ir_node->index);
+                    return -6;
+                }
+                break;
            case OP_BATCHNORM:
                if (!AddBatchNormNode(ir_graph, ir_node))
                {
@@ -224,6 +238,17 @@ int TensorRTEngine::Build(struct subgraph* subgraph)
                }
                break;
            }
+            case OP_HARDSWISH:
+            {
+                if (!AddHardSwishNode(ir_graph, ir_node))
+                {
+                    TLOG_ERR("Tengine: Cannot add HardSwish op(%d).\n", ir_node->index);
+                    return -6;
+                }
+                break;
+            }
+            case OP_INPUT:
+                continue;
            case OP_INTERP: {
                if (!AddInterpNode(ir_graph, ir_node))
                {
@@ -232,6 +257,15 @@ int TensorRTEngine::Build(struct subgraph* subgraph)
                }
                break;
            }
+            case OP_MISH:
+            {
+                if (!AddMishNode(ir_graph, ir_node))
+                {
+                    TLOG_ERR("Tengine: Cannot add Mish op(%d).\n", ir_node->index);
+                    return -6;
+                }
+                break;
+            }
            case OP_PERMUTE: {
                if (!AddPermuteNode(ir_graph, ir_node))
                {
@@ -274,8 +308,6 @@ int TensorRTEngine::Build(struct subgraph* subgraph)
                    return -6;
                }
            }
-            case OP_INPUT:
-                continue;
            case OP_SOFTMAX:
            {
                if(!AddSoftmaxNode(ir_graph, ir_node))
@@ -285,9 +317,18 @@ int TensorRTEngine::Build(struct subgraph* subgraph)
                }
                break;
            }
+            case OP_TRANSPOSE:
+            {
+                if(!AddTranspose(ir_graph, ir_node))
+                {
+                    TLOG_ERR("Tengine: Cannot add Softmax op(%d).\n", ir_node->index);
+                    return -6;
+                }
+                break;
+            }
            case OP_UPSAMPLE:
            {
-                if(!AddUpsampleNode(ir_graph, ir_node))
+                if(!AddUpSampleNode(ir_graph, ir_node))
                {
                    TLOG_ERR("Tengine: Cannot add Upsample op(%d).\n", ir_node->index);
                    return -6;
@@ -347,9 +388,21 @@ bool TensorRTEngine::AddTensor(struct graph* ir_graph, struct tensor *ir_tensor)
            trt_tensor = this->network->addInput(ir_tensor->name, nvinfer1::DataType::kFLOAT, dim4);
            break;
        }
+        case 5:
+        {
+            nvinfer1::Dims dim5;
+            dim5.nbDims = 5;
+            dim5.d[0] = dims[0];
+            dim5.d[1] = dims[1];
+            dim5.d[2] = dims[2];
+            dim5.d[3] = dims[3];
+            dim5.d[4] = dims[4];
+            trt_tensor = this->network->addInput(ir_tensor->name, nvinfer1::DataType::kFLOAT, dim5);
+            break;
+        }
        default:
        {
-            TLOG_ERR("Tengine: Tensor data type(%d) cannot supported.\n", ir_tensor->data_type);
+            TLOG_ERR("Tengine: Tensor dimension(%d) cannot supported.\n", dim);
            return false;
        }
    }

--- a/source/device/tensorrt/trt_executor.hpp
+++ b/source/device/tensorrt/trt_executor.hpp
@@ -82,6 +82,8 @@ private:

 private:
    bool AddTensor(struct graph* ir_graph, struct tensor* ir_tensor);
+    bool AddAbsVal(struct graph* ir_graph, struct node* node);
+    bool AddAddN(struct graph* ir_graph, struct node* node);
    bool AddBatchNormNode(struct graph* ir_graph, struct node* node);
    bool AddConcatNode(struct graph* ir_graph, struct node* node);
    bool AddConvolutionNode(struct graph* ir_graph, struct node* node);
@@ -91,14 +93,17 @@ private:
    bool AddEltwiseLayer(struct graph* ir_graph, struct node* node);
    bool AddFlattenNode(struct graph* ir_graph, struct node* node);
    bool AddFullyConnectedNode(struct graph* ir_graph, struct node* node);
+    bool AddHardSwishNode(struct graph* ir_graph, struct node* node);
    bool AddInterpNode(struct graph* ir_graph, struct node* node);
+    bool AddMishNode(struct graph* ir_graph, struct node* node);
    bool AddPermuteNode(struct graph* ir_graph, struct node* node);
    bool AddPoolingNode(struct graph* ir_graph, struct node* node);
    bool addReLUNode(struct graph* ir_graph, struct node* node);
    bool AddReshapeNode(struct graph* ir_graph, struct node* node);
+    bool AddTranspose(struct graph* ir_graph, struct node* node);
    bool AddSliceNode(struct graph* ir_graph, struct node* node);
    bool AddSoftmaxNode(struct graph* ir_graph, struct node* node);
-    bool AddUpsampleNode(struct graph* ir_graph, struct node* node);
+    bool AddUpSampleNode(struct graph* ir_graph, struct node* node);

 private:
    nvinfer1::IBuilder* builder;

--- a/source/device/tensorrt/trt_limit.hpp
+++ b/source/device/tensorrt/trt_limit.hpp
@@ -89,7 +89,7 @@ const int trt_supported_ops[] = {
        OP_GRU,
 #endif
        OP_HARDSIGMOID,
-        //OP_HARDSWISH,                 // Not supported, last checked version 7.1.3
+        OP_HARDSWISH,                   // Not supported, last checked version 7.1.3
        OP_INPUT,
        OP_INSTANCENORM,
        OP_INTERP,                      // should be as UpSample
@@ -157,7 +157,7 @@ const int trt_supported_ops[] = {
        OP_UNSQUEEZE,
        OP_UPSAMPLE,
        //OP_ZEROSLIKE,                 // Not supported, last checked version 7.1.3
-        //OP_MISH,
+        OP_MISH,
        OP_LOGSOFTMAX,
 #if NV_TENSORRT_MAJOR >= 6
        OP_RELU1,