diff --git a/source/device/tensorrt/op/trt_hardswish.cc b/source/device/tensorrt/op/trt_hardswish.cc index fec6c914668a5ab92bbad8a826b2fdf4d00b343e..1d8c1c957ec5957fd7db0c5f365ae9a268053598 100644 --- a/source/device/tensorrt/op/trt_hardswish.cc +++ b/source/device/tensorrt/op/trt_hardswish.cc @@ -24,7 +24,6 @@ #include "../trt_executor.hpp" - bool TensorRTEngine::AddHardSwishNode(struct graph* ir_graph, struct node* node) { struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]); @@ -42,50 +41,35 @@ bool TensorRTEngine::AddHardSwishNode(struct graph* ir_graph, struct node* node) return false; } - uint8_t add3_scale = 1, add3_shift = 3, add3_power = 1; - float div6_scale = 1 / 6.f, div6_shift = 0.f, div6_power = 1.f; - - nvinfer1::ITensor* trt_tensor = tensor_real_map[tensor_swap_map[input->index]]; - - nvinfer1::Weights add3_scale_param{nvinfer1::DataType::kINT8, &add3_scale, 1}; - nvinfer1::Weights add3_shift_param{nvinfer1::DataType::kINT8, &add3_shift, 1}; - nvinfer1::Weights add3_power_param{nvinfer1::DataType::kINT8, &add3_power, 1}; - - nvinfer1::Weights div6_scale_param{nvinfer1::DataType::kFLOAT, &div6_scale, 1}; - nvinfer1::Weights div6_shift_param{nvinfer1::DataType::kFLOAT, &div6_shift, 1}; - nvinfer1::Weights div6_power_param{nvinfer1::DataType::kFLOAT, &div6_power, 1}; - - nvinfer1::IScaleLayer* add3_layer = this->network->addScale(*trt_tensor, nvinfer1::ScaleMode::kUNIFORM, add3_shift_param, add3_scale_param, add3_power_param); - - std::string add3_layer_name = std::string(node->name) + "_add3"; - add3_layer->setName(add3_layer_name.c_str()); - - auto add3_output = add3_layer->getOutput(0); - - nvinfer1::IActivationLayer* relu6_layer = this->network->addActivation(*add3_output, nvinfer1::ActivationType::kRELU); - relu6_layer->setAlpha(6); - relu6_layer->setBeta(0); + nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]]; - std::string relu6_layer_name = std::string(node->name) + "_relu6"; - relu6_layer->setName(relu6_layer_name.c_str()); + float* param_buffer = (float*)sys_malloc(3 * sizeof(float)); + this->host_buffer.push_back(param_buffer); - auto relu6_output = relu6_layer->getOutput(0); + param_buffer[0] = 1.f / 6.f, param_buffer[1] = 0.5f, param_buffer[2] = 1.f; + nvinfer1::Weights lambda_scale{nvinfer1::DataType::kFLOAT, &(param_buffer[0]), 1}; + nvinfer1::Weights lambda_shift{nvinfer1::DataType::kFLOAT, &(param_buffer[1]), 1}; + nvinfer1::Weights lambda_power{nvinfer1::DataType::kFLOAT, &(param_buffer[2]), 1}; - nvinfer1::IScaleLayer* div6_layer = this->network->addScale(*relu6_output, nvinfer1::ScaleMode::kUNIFORM, div6_shift_param, div6_scale_param, div6_power_param); + nvinfer1::IScaleLayer* scale_layer = this->network->addScale(*input_tensor, nvinfer1::ScaleMode::kUNIFORM, lambda_shift, lambda_scale, lambda_power); + std::string scale_layer_name = std::string(node->name) + "_scale"; + scale_layer->setName(scale_layer_name.c_str()); - std::string div6_layer_name = std::string(node->name) + "_div6"; - div6_layer->setName(div6_layer_name.c_str()); + auto scale_layer_output = scale_layer->getOutput(0); - auto div6_output = relu6_layer->getOutput(0); + nvinfer1::IActivationLayer* relu1_layer = this->network->addActivation(*scale_layer_output, nvinfer1::ActivationType::kCLIP); + relu1_layer->setAlpha(0.f); + relu1_layer->setBeta(1.f); - nvinfer1::IElementWiseLayer* product_layer = this->network->addElementWise(*trt_tensor, *div6_output, nvinfer1::ElementWiseOperation::kPROD); + std::string relu1_layer_name = std::string(node->name) + "_relu1"; + relu1_layer->setName(relu1_layer_name.c_str()); - std::string product_layer_name = std::string(node->name) + "_dot"; - product_layer->setName(product_layer_name.c_str()); + auto relu1_output = relu1_layer->getOutput(0); - this->layer_map[node->index] = product_layer; + nvinfer1::IElementWiseLayer* product_Layer = this->network->addElementWise(*input_tensor, *relu1_output, nvinfer1::ElementWiseOperation::kPROD); + product_Layer->setName(node->name); - auto product_output = relu6_layer->getOutput(0); + auto product_output = product_Layer->getOutput(0); this->SetRange(output, product_output); diff --git a/source/device/tensorrt/op/trt_mish.cc b/source/device/tensorrt/op/trt_mish.cc index 93f8c37522e9d235d2bd3e0728c1b6724110a6c4..697ecc8c894830c5d97f75027077f7bcffe12fc2 100644 --- a/source/device/tensorrt/op/trt_mish.cc +++ b/source/device/tensorrt/op/trt_mish.cc @@ -56,10 +56,14 @@ bool TensorRTEngine::AddMishNode(struct graph* ir_graph, struct node* node) auto ex_output = ex_layer->getOutput(0); + float* param_buffer = (float*)sys_malloc(3 * sizeof(float)); + this->host_buffer.push_back(param_buffer); + + param_buffer[0] = 1.f, param_buffer[1] = -1.f, param_buffer[2] = 2.f; + // get (1 + e^x)^2 - int8_t ex_pos_1 = 1, ex_neg_1 = -1, ex_2 = 2; - nvinfer1::Weights ex_pos_1_param{nvinfer1::DataType::kINT8, &ex_pos_1, 1}; - nvinfer1::Weights ex_2_param{nvinfer1::DataType::kINT8, &ex_2, 1}; + nvinfer1::Weights ex_pos_1_param{nvinfer1::DataType::kFLOAT, ¶m_buffer[0], 1}; + nvinfer1::Weights ex_2_param{nvinfer1::DataType::kFLOAT, ¶m_buffer[2], 1}; nvinfer1::IScaleLayer* ex_scaled_layer = this->network->addScale(*ex_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_2_param); std::string ex_scaled_layer_name = std::string(node->name) + "_scale"; @@ -68,7 +72,7 @@ bool TensorRTEngine::AddMishNode(struct graph* ir_graph, struct node* node) auto ex_scaled_output = ex_scaled_layer->getOutput(0); // get (1 + e^x)^2 + 1, (1 + e^x)^2 - 1 - nvinfer1::Weights ex_neg_1_param{nvinfer1::DataType::kINT8, &ex_neg_1, 1}; + nvinfer1::Weights ex_neg_1_param{nvinfer1::DataType::kFLOAT, ¶m_buffer[1], 1}; nvinfer1::IScaleLayer* numerator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_pos_1_param); nvinfer1::IScaleLayer* denominator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_neg_1_param, ex_pos_1_param);