未验证 提交 db864f0b 编写于 作者: Z zhoutianzi666 提交者: GitHub

commit (#44534)

上级 1c0120e2
...@@ -333,23 +333,91 @@ class FcOpConverter : public OpConverter { ...@@ -333,23 +333,91 @@ class FcOpConverter : public OpConverter {
if (!engine_->with_dynamic_shape()) { if (!engine_->with_dynamic_shape()) {
x_num_col_dims--; x_num_col_dims--;
} }
PADDLE_ENFORCE_GT( // If use tensorrt'oss, the x_dim and x_num_col_dims need change, and can
x_dim.nbDims, // not add Shuffle layer in ernie's multihead.
x_num_col_dims, if (x_dim.nbDims == 4 && x_num_col_dims == 1) {
platform::errors::InvalidArgument( if (enable_int8 || support_int8) {
"Params and input dims mismatch. Paddle-TRT FC " // add conv1x1 layer
"converter expects x_dim.nbDims > x_num_col_dims, but " nvinfer1::DimsHW nv_ksize(1, 1);
"x_dim.nbDims : %d, x_num_col_dims : %d.", auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
x_dim.nbDims, Convolution,
x_num_col_dims)); *X,
// need reshape input before and after fc n_output,
auto* reshape_before_fc_layer = nv_ksize,
reshape_before_fc(X, x_dim, x_num_col_dims, output_name); weight.get(),
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0); bias.get());
if (enable_int8 || support_int8) { if (activation_type == "relu") {
engine_->SetTensorDynamicRange(reshape_itensor, in_scale); fc_layer_int8->setName(
("ernie_fc_op_int8: Convolution (Output: " + output_name + ")")
.c_str());
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("out_threshold"),
true,
platform::errors::InvalidArgument(
"must have out threshold in fc layers in int8 mode"));
float out_scale = 0;
if (enable_int8) {
out_scale =
PADDLE_GET_CONST(float, op_desc.GetAttr("out_threshold"));
} else {
out_scale = PADDLE_GET_CONST(float, op_desc.GetAttr("Out"));
}
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0),
out_scale);
nvinfer1::IActivationLayer* relu_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8,
"relu_after_ernie_fc_int8",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_int8,
"ernie_fc_op_int8: Convolution",
{output_name},
test_mode);
}
} else {
// add fc layer
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *X, n_output, weight.get(), bias.get());
if (activation_type == "relu") {
fc_layer_float->setName(
("ernie_fc_op_float: (Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer_float =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float,
"relu_after_ernie_fc_float",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(
fc_layer_float, "ernie_fc_op_float", {output_name}, test_mode);
}
}
} else { // need reshape input before and after fc
PADDLE_ENFORCE_GT(
x_dim.nbDims,
x_num_col_dims,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims > x_num_col_dims, but "
"x_dim.nbDims : %d, x_num_col_dims : %d.",
x_dim.nbDims,
x_num_col_dims));
auto* reshape_before_fc_layer =
reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
if (enable_int8 || support_int8) {
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
}
regist_fc(reshape_itensor, n_output, weight, bias);
} }
regist_fc(reshape_itensor, n_output, weight, bias);
} }
}; };
......
...@@ -249,9 +249,9 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest): ...@@ -249,9 +249,9 @@ class TrtConvertFcTest2(TrtLayerAutoScanTest):
# this is the special case when x_dim.nbDims == 4 && x_num_col_dims == 1 # this is the special case when x_dim.nbDims == 4 && x_num_col_dims == 1
class TrtConvertFcTest3(TrtLayerAutoScanTest): class TrtConvertFcTest3(TrtLayerAutoScanTest):
# this case will invoke a bug in fc_op.cc, so return False
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True return False
def sample_program_configs(self): def sample_program_configs(self):
self.trt_param.workspace_size = 1073741824 self.trt_param.workspace_size = 1073741824
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册