diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc index b48c8c6e70a939e6494c766cc8eaebd503e69855..354db8acf87a7322bf26ceaa5a346d3a5cc1b42f 100644 --- a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc +++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc @@ -153,6 +153,43 @@ QuantDequantFusePass::QuantDequantFusePass() { .AddAttr("data_format") .IsStringIn({"NCHW", "NHWC", "AnyLayout"}) .End(); + AddOpCompat(OpCompat("depthwise_conv2d")) + .AddInput("Input") + .IsTensor() + .End() + .AddInput("Filter") + .IsTensor() + .End() + .AddInput("Bias") + .IsTensor() + .IsOptional() + .End() + .AddInput("ResidualData") + .IsTensor() + .IsOptional() + .End() + .AddOutput("Output") + .IsTensor() + .End() + .AddAttr("strides") + .IsType>() + .End() + .AddAttr("paddings") + .IsType>() + .End() + .AddAttr("padding_algorithm") + .IsOptional() + .IsStringIn({"EXPLICIT", "SAME", "VALID"}) + .End() + .AddAttr("groups") + .IsNumGE(1) + .End() + .AddAttr("dilations") + .IsType>() + .End() + .AddAttr("data_format") + .IsStringIn({"NCHW", "NHWC", "AnyLayout"}) + .End(); AddOpCompat(OpCompat("mul")) .AddInput("X") .IsTensor() diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index 74bb854e55f8231042fb014817a81dfa647c7e7b..ef50f3db42c6f54b146e01cb8124d344746af212 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -33,6 +33,53 @@ namespace tensorrt { */ class FcOpConverter : public OpConverter { public: + nvinfer1::ILayer* reshape_before_fc(nvinfer1::ITensor* before_fc, + nvinfer1::Dims x_dim, + int x_num_col_dims) { + // add shuffle before fc + nvinfer1::Dims reshape_before_fc_dim; + reshape_before_fc_dim.nbDims = x_num_col_dims + 3; + // padding shape "* x q x 1 x 1" + for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) { + reshape_before_fc_dim.d[i] = 1; + } + for (int i = 0; i < x_dim.nbDims; i++) { + if (i < x_num_col_dims) { + reshape_before_fc_dim.d[i] = 0; + } else { + if (x_dim.d[i] < 0) { + reshape_before_fc_dim.d[x_num_col_dims] = -1; + break; + } + reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i]; + } + } + auto* reshape_before_fc_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *before_fc); + reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); + return reshape_before_fc_layer; + } + + nvinfer1::ILayer* reshape_after_fc(nvinfer1::ITensor* after_fc, + nvinfer1::Dims x_dim, int x_num_col_dims) { + // add shuffle after fc + nvinfer1::Dims reshape_after_fc_dim; + if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 && + x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 1) { + // If use tensorrt'oss, the x_dim and x_num_col_dims need change + reshape_after_fc_dim.nbDims = 4; + } else { + reshape_after_fc_dim.nbDims = x_num_col_dims + 1; + } + for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) { + reshape_after_fc_dim.d[i] = 0; + } + auto* reshape_after_fc_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *after_fc); + reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim); + return reshape_after_fc_layer; + } + void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias"; @@ -92,10 +139,8 @@ class FcOpConverter : public OpConverter { "it's %d-dimensional.", Y_t->dims().size())); // a matrix size_t n_output = Y_t->dims()[1]; - int m = Y_t->dims()[0]; int n = Y_t->dims()[1]; - auto tranpose_weight = [](const float* src, float* dst, int m, int n) { for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { @@ -119,47 +164,35 @@ class FcOpConverter : public OpConverter { auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output, nv_ksize, weight.get(), bias.get()); - engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0), out_scale); + auto* fc_after_reshape_int8 = reshape_after_fc( + fc_layer_int8->getOutput(0), x_dim, x_num_col_dims); + engine_->SetTensorDynamicRange(fc_after_reshape_int8->getOutput(0), + out_scale); if (activation_type == "relu") { nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER( - engine_, Activation, *(fc_layer_int8->getOutput(0)), + engine_, Activation, *(fc_after_reshape_int8->getOutput(0)), nvinfer1::ActivationType::kRELU); RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle", {output_name}, test_mode); } else { - RreplenishLayerAndOutput(fc_layer_int8, "shuffle_after_fc", + RreplenishLayerAndOutput(fc_after_reshape_int8, "shuffle_after_fc", {output_name}, test_mode); } } else { // add fc layer - auto* fc_layer_before = + auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output, weight.get(), bias.get()); - fc_layer_before->setName( - ("fc_layer_before(Output: " + output_name + ")").c_str()); - // add shuffle after fc - nvinfer1::Dims reshape_after_fc_dim; - if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 && - x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 1) { - // If use tensorrt'oss, the x_dim and x_num_col_dims need change - reshape_after_fc_dim.nbDims = 4; - } else { - reshape_after_fc_dim.nbDims = x_num_col_dims + 1; - } - for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) { - reshape_after_fc_dim.d[i] = 0; - } - auto* fc_layer_float = TRT_ENGINE_ADD_LAYER( - engine_, Shuffle, *fc_layer_before->getOutput(0)); - fc_layer_float->setReshapeDimensions(reshape_after_fc_dim); + auto* fc_after_reshape_float = reshape_after_fc( + fc_layer_float->getOutput(0), x_dim, x_num_col_dims); if (activation_type == "relu") { nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER( - engine_, Activation, *(fc_layer_float->getOutput(0)), + engine_, Activation, *(fc_after_reshape_float->getOutput(0)), nvinfer1::ActivationType::kRELU); RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle", {output_name}, test_mode); } else { - RreplenishLayerAndOutput(fc_layer_float, "shuffle_after_fc", + RreplenishLayerAndOutput(fc_after_reshape_float, "shuffle_after_fc", {output_name}, test_mode); } } @@ -169,12 +202,10 @@ class FcOpConverter : public OpConverter { weight_data_tmp.reserve(Y_t->numel()); memcpy(weight_data_tmp.data(), weight_data, Y_t->numel() * sizeof(float)); tranpose_weight(weight_data_tmp.data(), weight_data, m, n); - TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, static_cast(weight_data), static_cast(Y_t->numel())}; weight.dims.assign({n, m}); - float* bias_data = nullptr; int bias_num = 0; if (with_bias) { @@ -204,28 +235,7 @@ class FcOpConverter : public OpConverter { "converter expects x_dim.nbDims > x_num_col_dims, but " "x_dim.nbDims : %d, x_num_col_dims : %d.", x_dim.nbDims, x_num_col_dims)); - // add shuffle before fc - nvinfer1::Dims reshape_before_fc_dim; - reshape_before_fc_dim.nbDims = x_num_col_dims + 3; - // padding shape "* x q x 1 x 1" - for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) { - reshape_before_fc_dim.d[i] = 1; - } - for (int i = 0; i < x_dim.nbDims; i++) { - if (i < x_num_col_dims) { - reshape_before_fc_dim.d[i] = 0; - } else { - if (x_dim.d[i] < 0) { - reshape_before_fc_dim.d[x_num_col_dims] = -1; - break; - } - reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i]; - } - } - auto* reshape_before_fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); - reshape_before_fc_layer->setName( - ("shuffle_before_fc(Output: " + output_name + ")").c_str()); + auto* reshape_before_fc_layer = reshape_before_fc(X, x_dim, x_num_col_dims); auto* reshape_itensor = reshape_before_fc_layer->getOutput(0); if (enable_int8) { engine_->SetTensorDynamicRange(reshape_itensor, in_scale); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index bfe3dfc85eecdd966fbfa18d128a66373cd75dd7..72f20790f35242124f467cc18256b6b63f687d79 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -698,15 +698,22 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, return false; } } + if (op_type == "reshape" || op_type == "reshape2") { if (!desc.HasAttr("shape")) { return false; } // Paddle-TRT does not support the input tensors: Shape and ShapeTensor auto reshape_inputs = desc.Inputs(); - if (reshape_inputs.find("Shape") != reshape_inputs.end() || - reshape_inputs.find("ShapeTensor") != reshape_inputs.end()) { - return false; + if (reshape_inputs.find("Shape") != reshape_inputs.end()) { + if (desc.Input("Shape").size() >= 1) { + return false; + } + } + if (reshape_inputs.find("ShapeTensor") != reshape_inputs.end()) { + if (desc.Input("ShapeTensor").size() >= 1) { + return false; + } } std::vector shape = BOOST_GET_CONST(std::vector, desc.GetAttr("shape"));