未验证 提交 57df4695 编写于 作者: K kalcohol 提交者: GitHub

fix trt mish and hardswish op buffer issue (#681)

* fix param buffer issue

* remove debug code
上级 f3943f73
......@@ -24,7 +24,6 @@
#include "../trt_executor.hpp"
bool TensorRTEngine::AddHardSwishNode(struct graph* ir_graph, struct node* node)
{
struct tensor* input = get_ir_graph_tensor(ir_graph, node->input_tensors[0]);
......@@ -42,50 +41,35 @@ bool TensorRTEngine::AddHardSwishNode(struct graph* ir_graph, struct node* node)
return false;
}
uint8_t add3_scale = 1, add3_shift = 3, add3_power = 1;
float div6_scale = 1 / 6.f, div6_shift = 0.f, div6_power = 1.f;
nvinfer1::ITensor* trt_tensor = tensor_real_map[tensor_swap_map[input->index]];
nvinfer1::Weights add3_scale_param{nvinfer1::DataType::kINT8, &add3_scale, 1};
nvinfer1::Weights add3_shift_param{nvinfer1::DataType::kINT8, &add3_shift, 1};
nvinfer1::Weights add3_power_param{nvinfer1::DataType::kINT8, &add3_power, 1};
nvinfer1::Weights div6_scale_param{nvinfer1::DataType::kFLOAT, &div6_scale, 1};
nvinfer1::Weights div6_shift_param{nvinfer1::DataType::kFLOAT, &div6_shift, 1};
nvinfer1::Weights div6_power_param{nvinfer1::DataType::kFLOAT, &div6_power, 1};
nvinfer1::IScaleLayer* add3_layer = this->network->addScale(*trt_tensor, nvinfer1::ScaleMode::kUNIFORM, add3_shift_param, add3_scale_param, add3_power_param);
std::string add3_layer_name = std::string(node->name) + "_add3";
add3_layer->setName(add3_layer_name.c_str());
auto add3_output = add3_layer->getOutput(0);
nvinfer1::IActivationLayer* relu6_layer = this->network->addActivation(*add3_output, nvinfer1::ActivationType::kRELU);
relu6_layer->setAlpha(6);
relu6_layer->setBeta(0);
nvinfer1::ITensor* input_tensor = tensor_real_map[tensor_swap_map[input->index]];
std::string relu6_layer_name = std::string(node->name) + "_relu6";
relu6_layer->setName(relu6_layer_name.c_str());
float* param_buffer = (float*)sys_malloc(3 * sizeof(float));
this->host_buffer.push_back(param_buffer);
auto relu6_output = relu6_layer->getOutput(0);
param_buffer[0] = 1.f / 6.f, param_buffer[1] = 0.5f, param_buffer[2] = 1.f;
nvinfer1::Weights lambda_scale{nvinfer1::DataType::kFLOAT, &(param_buffer[0]), 1};
nvinfer1::Weights lambda_shift{nvinfer1::DataType::kFLOAT, &(param_buffer[1]), 1};
nvinfer1::Weights lambda_power{nvinfer1::DataType::kFLOAT, &(param_buffer[2]), 1};
nvinfer1::IScaleLayer* div6_layer = this->network->addScale(*relu6_output, nvinfer1::ScaleMode::kUNIFORM, div6_shift_param, div6_scale_param, div6_power_param);
nvinfer1::IScaleLayer* scale_layer = this->network->addScale(*input_tensor, nvinfer1::ScaleMode::kUNIFORM, lambda_shift, lambda_scale, lambda_power);
std::string scale_layer_name = std::string(node->name) + "_scale";
scale_layer->setName(scale_layer_name.c_str());
std::string div6_layer_name = std::string(node->name) + "_div6";
div6_layer->setName(div6_layer_name.c_str());
auto scale_layer_output = scale_layer->getOutput(0);
auto div6_output = relu6_layer->getOutput(0);
nvinfer1::IActivationLayer* relu1_layer = this->network->addActivation(*scale_layer_output, nvinfer1::ActivationType::kCLIP);
relu1_layer->setAlpha(0.f);
relu1_layer->setBeta(1.f);
nvinfer1::IElementWiseLayer* product_layer = this->network->addElementWise(*trt_tensor, *div6_output, nvinfer1::ElementWiseOperation::kPROD);
std::string relu1_layer_name = std::string(node->name) + "_relu1";
relu1_layer->setName(relu1_layer_name.c_str());
std::string product_layer_name = std::string(node->name) + "_dot";
product_layer->setName(product_layer_name.c_str());
auto relu1_output = relu1_layer->getOutput(0);
this->layer_map[node->index] = product_layer;
nvinfer1::IElementWiseLayer* product_Layer = this->network->addElementWise(*input_tensor, *relu1_output, nvinfer1::ElementWiseOperation::kPROD);
product_Layer->setName(node->name);
auto product_output = relu6_layer->getOutput(0);
auto product_output = product_Layer->getOutput(0);
this->SetRange(output, product_output);
......
......@@ -56,10 +56,14 @@ bool TensorRTEngine::AddMishNode(struct graph* ir_graph, struct node* node)
auto ex_output = ex_layer->getOutput(0);
float* param_buffer = (float*)sys_malloc(3 * sizeof(float));
this->host_buffer.push_back(param_buffer);
param_buffer[0] = 1.f, param_buffer[1] = -1.f, param_buffer[2] = 2.f;
// get (1 + e^x)^2
int8_t ex_pos_1 = 1, ex_neg_1 = -1, ex_2 = 2;
nvinfer1::Weights ex_pos_1_param{nvinfer1::DataType::kINT8, &ex_pos_1, 1};
nvinfer1::Weights ex_2_param{nvinfer1::DataType::kINT8, &ex_2, 1};
nvinfer1::Weights ex_pos_1_param{nvinfer1::DataType::kFLOAT, &param_buffer[0], 1};
nvinfer1::Weights ex_2_param{nvinfer1::DataType::kFLOAT, &param_buffer[2], 1};
nvinfer1::IScaleLayer* ex_scaled_layer = this->network->addScale(*ex_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_2_param);
std::string ex_scaled_layer_name = std::string(node->name) + "_scale";
......@@ -68,7 +72,7 @@ bool TensorRTEngine::AddMishNode(struct graph* ir_graph, struct node* node)
auto ex_scaled_output = ex_scaled_layer->getOutput(0);
// get (1 + e^x)^2 + 1, (1 + e^x)^2 - 1
nvinfer1::Weights ex_neg_1_param{nvinfer1::DataType::kINT8, &ex_neg_1, 1};
nvinfer1::Weights ex_neg_1_param{nvinfer1::DataType::kFLOAT, &param_buffer[1], 1};
nvinfer1::IScaleLayer* numerator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_pos_1_param, ex_pos_1_param);
nvinfer1::IScaleLayer* denominator_layer = this->network->addScale(*ex_scaled_output, nvinfer1::ScaleMode::kUNIFORM, ex_pos_1_param, ex_neg_1_param, ex_pos_1_param);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册