未验证 提交 a3689d8c 编写于 作者: L Leo Chen 提交者: GitHub

[Paddle-TRT] Replace GeLU plugin with TensorRT built-in layer for TensorRT 7.0. (#38399)

* Replace GeLU plugin with TRT built-in layers for approximate GeLU

* Add TensorRT built-in layer for nonapproximate GeLU
上级 556f6eb0
......@@ -43,13 +43,142 @@ class GeluOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert fluid gelu op to tensorrt gelu layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
int input_num = op_desc.Input("X").size();
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
nvinfer1::ILayer* layer = nullptr;
if (op_desc.HasAttr("approximate") &&
BOOST_GET_CONST(bool, op_desc.GetAttr("approximate"))) {
#if IS_TRT_VERSION_GE(7000)
nvinfer1::Dims input_shape;
input_shape.nbDims = input->getDimensions().nbDims;
for (int i = 0; i < input_shape.nbDims; ++i) {
input_shape.d[i] = 1;
}
std::string out_name = op_desc.Output("Out").front();
auto create_weights = [&](float data, std::string type) -> float* {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
tmp_tensor->Resize({1});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
tmp_data[0] = data;
engine_->SetWeights(out_name + "_gelu_op_" + type,
std::move(tmp_tensor));
return tmp_data;
};
float* constant_pow = create_weights(3.0f, "constant_pow");
float* constant_multiply = create_weights(0.044715f, "constant_multiply");
float* constant_sqrt =
create_weights(0.79788456080286535587989211986876f, "constant_sqrt");
float* constant_one = create_weights(1.0f, "constant_one");
float* constant_half = create_weights(0.5f, "constant_half");
auto constant_layer_pow = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_pow), 1});
auto constant_layer_multiply = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_multiply), 1});
auto constant_layer_sqrt = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_sqrt), 1});
auto constant_layer_one = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_one), 1});
auto constant_layer_half = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_half), 1});
auto layer_pow = TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *input, *constant_layer_pow->getOutput(0),
nvinfer1::ElementWiseOperation::kPOW);
auto layer_mul =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_pow->getOutput(0),
*constant_layer_multiply->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD);
auto layer_add =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_mul->getOutput(0),
*input, nvinfer1::ElementWiseOperation::kSUM);
auto layer_sqrt =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0),
*constant_layer_sqrt->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD);
auto layer_tanh =
TRT_ENGINE_ADD_LAYER(engine_, Activation, *layer_sqrt->getOutput(0),
nvinfer1::ActivationType::kTANH);
auto layer_one =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_tanh->getOutput(0),
*constant_layer_one->getOutput(0),
nvinfer1::ElementWiseOperation::kSUM);
auto layer_CDF =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_one->getOutput(0),
*constant_layer_half->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD);
auto y =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0),
*input, nvinfer1::ElementWiseOperation::kPROD);
layer = y;
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running GeLU Op with approximate True, need to confirm that "
"your TRT version is no less than 7.0"));
#endif
} else {
#if IS_TRT_VERSION_GE(7000)
nvinfer1::Dims input_shape;
input_shape.nbDims = input->getDimensions().nbDims;
for (int i = 0; i < input_shape.nbDims; ++i) {
input_shape.d[i] = 1;
}
std::string out_name = op_desc.Output("Out").front();
auto create_weights = [&](float data, std::string type) -> float* {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
tmp_tensor->Resize({1});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
tmp_data[0] = data;
engine_->SetWeights(out_name + "_gelu_op_" + type,
std::move(tmp_tensor));
return tmp_data;
};
float* constant_one = create_weights(1.0f, "constant_one");
float* constant_half = create_weights(0.5f, "constant_half");
float* constant_rsqrt2 =
create_weights(0.70710678118f, "constant_rsqrt2");
auto constant_layer_one = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_one), 1});
auto constant_layer_half = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_half), 1});
auto constant_layer_rsqrt2 = TRT_ENGINE_ADD_LAYER(
engine_, Constant, input_shape,
nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(constant_rsqrt2), 1});
auto layer_mul = TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *input, *constant_layer_rsqrt2->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD);
auto layer_erf =
TRT_ENGINE_ADD_LAYER(engine_, Unary, *layer_mul->getOutput(0),
nvinfer1::UnaryOperation::kERF);
auto layer_add =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_erf->getOutput(0),
*constant_layer_one->getOutput(0),
nvinfer1::ElementWiseOperation::kSUM);
auto layer_CDF =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0),
*constant_layer_half->getOutput(0),
nvinfer1::ElementWiseOperation::kPROD);
auto y =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0),
*input, nvinfer1::ElementWiseOperation::kPROD);
layer = y;
#else // if IS_TRT_VERSION_GE(7000)
int input_num = op_desc.Input("X").size();
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
bool with_fp16 =
......@@ -68,6 +197,8 @@ class GeluOpConverter : public OpConverter {
plugin::GeluPlugin* plugin = new plugin::GeluPlugin(with_fp16);
layer = engine_->AddPlugin(&input, input_num, plugin);
}
#endif // if IS_TRT_VERSION_GE(7000)
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "gelu", {output_name}, test_mode);
}
......
......@@ -1019,9 +1019,12 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return false;
}
#if IS_TRT_VERSION_LT(7000)
if (desc.HasAttr("approximate")) {
VLOG(3) << "approximate gelu op needs TensorRT 7.0 and after";
if (BOOST_GET_CONST(bool, desc.GetAttr("approximate"))) return false;
}
#endif
auto* block = desc.Block();
if (block == nullptr) {
......@@ -1030,6 +1033,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
"the pass.";
return false;
}
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
......
......@@ -98,7 +98,17 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if attrs[0]['approximate'] == True or self.dims == 1:
valid_version = (7, 0, 0)
compile_version = paddle_infer.get_trt_compile_version()
runtime_version = paddle_infer.get_trt_runtime_version()
self.assertTrue(compile_version == runtime_version)
# Dimension one only runs on Paddle OP
if self.dims == 1:
return 0, 3
if compile_version >= valid_version:
return 1, 2
else:
if attrs[0]['approximate'] == True:
return 0, 3
else:
return 1, 2
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册