From d77c4955fafd9dd01df5936628dd273b208fa3d0 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com> Date: Fri, 24 Jun 2022 15:38:56 +0800 Subject: [PATCH] [Inference] rewrite elementwise trt layer (#43615) * rewrite elementwise --- .../tensorrt/convert/elementwise_op.cc | 326 +++++------------- paddle/fluid/inference/tensorrt/op_teller.cc | 22 ++ 2 files changed, 116 insertions(+), 232 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 35d3ead009..2d342a6f70 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -19,228 +19,115 @@ namespace paddle { namespace inference { namespace tensorrt { -static bool CheckDims(const nvinfer1::Dims& dims_x, - const nvinfer1::Dims& dims_y) { - if (dims_x.nbDims != dims_y.nbDims) { - return false; - } - for (int i = 0; i < dims_x.nbDims; i++) { - if (dims_x.d[i] != dims_y.d[i]) { - return false; - } - } - return true; -} - -class ElementwiseWeightOpConverter : public OpConverter { +class ElementwiseTensorOpConverter : public OpConverter { public: - ElementwiseWeightOpConverter() {} + ElementwiseTensorOpConverter() {} void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, bool test_mode) override { - // Here the two nullptr looks strange, that's because the - // framework::OpDesc's constructor is strange. - nvinfer1::ILayer* layer = nullptr; + const framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer"; framework::OpDesc op_desc(op, nullptr); - VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; - auto* X = engine_->GetITensor(op_desc.Input("X").front()); + nvinfer1::ITensor* Y = nullptr; auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); - PADDLE_ENFORCE_NOT_NULL( - Y_v, platform::errors::NotFound("Variable %s not found in scope.", - op_desc.Input("Y").front().c_str())); - auto* Y_t = Y_v->GetMutable(); - float* weight_data = nullptr; - auto output_name = op_desc.Output("Out")[0]; - weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t); + if (Y_v) { + // Y is weight + auto* Y_t = Y_v->GetMutable(); + float* weight_data = + engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t); + std::vector dims_y = phi::vectorize(Y_t->dims()); + TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT, + static_cast(weight_data), + static_cast(Y_t->numel())}; + nvinfer1::Dims trt_dims_y; + trt_dims_y.nbDims = dims_y.size(); + for (int i = 0; i < trt_dims_y.nbDims; i++) { + trt_dims_y.d[i] = dims_y[i]; + } + Y = TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims_y, y_weight.get()) + ->getOutput(0); + } else { + Y = engine_->GetITensor(op_desc.Input("Y").front()); + } + + if (X->getDimensions().nbDims < Y->getDimensions().nbDims) { + auto* tmp = X; + X = Y; + Y = tmp; + } nvinfer1::Dims dims_x = X->getDimensions(); - std::vector dims_y = phi::vectorize(Y_t->dims()); + nvinfer1::Dims dims_y = Y->getDimensions(); + auto output_name = op_desc.Output("Out")[0]; - auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) { - nvinfer1::IShuffleLayer* expand_layer = nullptr; - nvinfer1::IShuffleLayer* squeeze_layer = nullptr; - int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0; - auto input_dim = X->getDimensions(); - // reshape - if (input_dim.nbDims < 3 + dynamic_shape_offset) { - nvinfer1::Dims expand_shape; - expand_shape.nbDims = 3 + dynamic_shape_offset; - for (int i = 0; i < expand_shape.nbDims; i++) { - if (i < input_dim.nbDims) { - expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i]; - } else { - expand_shape.d[i] = 1; - } - } - expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - expand_layer->setReshapeDimensions(expand_shape); - X = expand_layer->getOutput(0); - expand_layer->getOutput(0)->setName( - ("elementwise_reshape_out: " + output_name).c_str()); - expand_layer->setName( - ("Elewise: Shuffle: (Output: " + output_name + ")").c_str()); - } - // eltwise_ops - TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, nullptr, - 0}; - TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr, - 0}; - TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, - 0}; - if (op_type_ == "add") { - shift_weights = TensorRTEngine::Weight( - nvinfer1::DataType::kFLOAT, static_cast(weight_data), - static_cast(Y_t->numel())); - } else if (op_type_ == "sub") { - for (int i = 0; i < Y_t->numel(); i++) { - weight_data[i] = -weight_data[i]; - } - shift_weights = TensorRTEngine::Weight( - nvinfer1::DataType::kFLOAT, static_cast(weight_data), - static_cast(Y_t->numel())); - } else if (op_type_ == "mul") { - scale_weights = TensorRTEngine::Weight( - nvinfer1::DataType::kFLOAT, static_cast(weight_data), - static_cast(Y_t->numel())); - } else if (op_type_ == "div") { - for (int i = 0; i < Y_t->numel(); i++) { - weight_data[i] = 1.f / weight_data[i]; + // axis here is relative to explicit batch + int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); + int real_x_rank = dims_x.nbDims; + int real_y_rank = dims_y.nbDims; + if (!engine_->with_dynamic_shape()) { + real_x_rank++; + real_y_rank++; + if (Y_v) real_y_rank--; + } + if (axis == -1) { + axis = real_x_rank - real_y_rank; + } + if (!engine_->with_dynamic_shape() && axis > 0) { + axis--; + } + + // X: - - - - - - - + // axis + // Y: - - - + // we need expand Y's rank = X's rank + int left_one_num = axis; + int right_one_num = dims_x.nbDims - axis - dims_y.nbDims; + nvinfer1::IShuffleLayer* reshape_layer; + nvinfer1::ITensor* reshape_y_tensor; + if (left_one_num > 0 || right_one_num > 0) { + if (engine_->with_dynamic_shape()) { + auto* y_shape_tensor = Shape(Y); + auto* new_y_shape_tensor = y_shape_tensor; + if (axis > 0) { + std::vector left_one(left_one_num, 1); + auto* left_one_tensor = Add1DConstantLayer(left_one); + new_y_shape_tensor = Concat(std::vector{ + left_one_tensor, new_y_shape_tensor}); } - scale_weights = TensorRTEngine::Weight( - nvinfer1::DataType::kFLOAT, static_cast(weight_data), - static_cast(Y_t->numel())); - } else if (op_type_ == "pow") { - power_weights = TensorRTEngine::Weight( - nvinfer1::DataType::kFLOAT, static_cast(weight_data), - static_cast(Y_t->numel())); - } - nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER( - engine_, ScaleNd, *X, scale_mode, shift_weights.get(), - scale_weights.get(), power_weights.get(), dynamic_shape_offset); - layer = scale_layer; - // reshape - if (input_dim.nbDims < 3 + dynamic_shape_offset) { - nvinfer1::Dims squeeze_shape; - squeeze_shape.nbDims = input_dim.nbDims; - for (int i = 0; i < squeeze_shape.nbDims; i++) { - squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i]; + if (right_one_num > 0) { + std::vector right_one(right_one_num, 1); + auto* right_one_tensor = Add1DConstantLayer(right_one); + new_y_shape_tensor = Concat(std::vector{ + new_y_shape_tensor, right_one_tensor}); } - squeeze_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); - squeeze_layer->setReshapeDimensions(squeeze_shape); - RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_, - {output_name}, test_mode); + reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y); + reshape_layer->setInput(1, *new_y_shape_tensor); } else { - RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, - {output_name}, test_mode); + nvinfer1::Dims new_y_dims; + new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num; + for (int i = 0; i < new_y_dims.nbDims; i++) new_y_dims.d[i] = 1; + for (int i = 0; i < dims_y.nbDims; i++) + new_y_dims.d[left_one_num + i] = dims_y.d[i]; + reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y); + reshape_layer->setReshapeDimensions(new_y_dims); } - }; - - // dynamic shape - if (engine_->with_dynamic_shape()) { - if (dims_y.size() == 1 && dims_y[0] == dims_x.d[1]) { - regist_eltwise_weight(nvinfer1::ScaleMode::kCHANNEL); - } else if (dims_y.size() == 1 && dims_y[0] == 1) { - regist_eltwise_weight(nvinfer1::ScaleMode::kUNIFORM); - } else if (dims_y.size() == static_cast(dims_x.nbDims)) { - regist_eltwise_weight(nvinfer1::ScaleMode::kELEMENTWISE); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "The size of input_y's dims is %d, but TensorRT dynamic shape " - "only support size = 1 or size = input_x.size() for Elementwise " - "op!", - dims_y.size())); - } - return; - } - - // static shape with dynamic batch - std::vector no_batch_dims; - int start_index = 0; - for (; start_index < dims_x.nbDims; start_index++) { - no_batch_dims.push_back(dims_x.d[start_index]); - } - if (dims_y.size() == 1 && dims_y[0] == no_batch_dims[0]) { - regist_eltwise_weight(nvinfer1::ScaleMode::kCHANNEL); - } else if (dims_y.size() == 1 && dims_y[0] == 1) { - regist_eltwise_weight(nvinfer1::ScaleMode::kUNIFORM); - } else if (dims_y.size() == no_batch_dims.size() + 1) { - regist_eltwise_weight(nvinfer1::ScaleMode::kELEMENTWISE); + reshape_y_tensor = reshape_layer->getOutput(0); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "The size of input_y's dims is %d, but TensorRT dynamic shape " - "only support size = 1 or size = input_x.size() for Elementwise " - "op!", - dims_y.size())); + // In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt + // 6015 faling, how ridiculous! + reshape_y_tensor = Y; } - } - protected: - std::string op_type_; -}; - -class ElementwiseTensorOpConverter : public OpConverter { - public: - ElementwiseTensorOpConverter() {} - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, bool test_mode) override { auto op_pair = ops.find(op_type_); - PADDLE_ENFORCE_NE(op_pair, ops.end(), + PADDLE_ENFORCE_NE(op_pair, + ops.end(), platform::errors::InvalidArgument( "Elementwise op's type(%s) is not supported. Please " "check if the op_type is correct.", op_type_)); - // Here the two nullptr looks strange, that's because the - // framework::OpDesc's constructor is strange. - framework::OpDesc op_desc(op, nullptr); - nvinfer1::ILayer* layer = nullptr; - - auto* X = engine_->GetITensor(op_desc.Input("X").front()); - auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); - std::vector itensors; - itensors.push_back(X); - itensors.push_back(Y); - nvinfer1::Dims dims_x = X->getDimensions(); - nvinfer1::Dims dims_y = Y->getDimensions(); - - int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); - auto output_name = op_desc.Output("Out")[0]; - - auto common_func = [&](nvinfer1::ILayer* layer) { - RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode); - }; - if (dims_x.nbDims == dims_y.nbDims) { - // The two input tensor should have the same dims - VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer"; - nvinfer1::IElementWiseLayer* elet_layer = - TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *X, *Y, op_pair->second); - - layer = elet_layer; - } else { - VLOG(3) << "Convert a fluid elementwise op to TensorRT " - "ElementWisePluginLayer"; - if (engine_->with_dynamic_shape()) { -#if IS_TRT_VERSION_GE(6000) - plugin::ElementwisePluginDynamic* plugin = - new plugin::ElementwisePluginDynamic(op_type_, axis); - layer = engine_->AddDynamicPlugin(itensors.data(), 2, plugin); -#else - PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); -#endif - } else { - plugin::ElementWisePlugin* plugin = - new plugin::ElementWisePlugin(op_type_, dims_x, dims_y, axis); - - std::vector inputs{X, Y}; - auto* plugin_layer = engine_->AddPlugin( - inputs.data(), inputs.size(), - reinterpret_cast(plugin)); - layer = plugin_layer; - } - } - common_func(layer); + auto* layer = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second); + RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode); } protected: @@ -260,31 +147,6 @@ const std::unordered_map {"max", nvinfer1::ElementWiseOperation::kMAX}, }; -class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter { - public: - ElementwiseWeightAddOpConverter() { op_type_ = "add"; } -}; - -class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter { - public: - ElementwiseWeightMulOpConverter() { op_type_ = "mul"; } -}; - -class ElementwiseWeightSubOpConverter : public ElementwiseWeightOpConverter { - public: - ElementwiseWeightSubOpConverter() { op_type_ = "sub"; } -}; - -class ElementwiseWeightDivOpConverter : public ElementwiseWeightOpConverter { - public: - ElementwiseWeightDivOpConverter() { op_type_ = "div"; } -}; - -class ElementwiseWeightPowOpConverter : public ElementwiseWeightOpConverter { - public: - ElementwiseWeightPowOpConverter() { op_type_ = "pow"; } -}; - class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { public: ElementwiseTensorAddOpConverter() { op_type_ = "add"; } @@ -325,15 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { } // namespace paddle REGISTER_TRT_OP_CONVERTER(elementwise_add_weight, - ElementwiseWeightAddOpConverter); + ElementwiseTensorAddOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight, - ElementwiseWeightMulOpConverter); + ElementwiseTensorMulOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight, - ElementwiseWeightSubOpConverter); + ElementwiseTensorSubOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_div_weight, - ElementwiseWeightDivOpConverter); + ElementwiseTensorDivOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight, - ElementwiseWeightPowOpConverter); + ElementwiseTensorPowOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor, ElementwiseTensorAddOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 66dcac02b4..7465c24e31 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1256,6 +1256,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, } } } + // not support following four inputs for slice in paddle-trt + auto slice_inputs = desc.Inputs(); // its size == 5 + if (slice_inputs.find("StartsTensor") != slice_inputs.end()) { + if (desc.Input("StartsTensor").size()) { + return false; + } + } + if (slice_inputs.find("EndsTensor") != slice_inputs.end()) { + if (desc.Input("EndsTensor").size()) { + return false; + } + } + if (slice_inputs.find("StartsTensorList") != slice_inputs.end()) { + if (desc.Input("StartsTensorList").size()) { + return false; + } + } + if (slice_inputs.find("EndsTensorList") != slice_inputs.end()) { + if (desc.Input("EndsTensorList").size()) { + return false; + } + } } if (op_type == "elementwise_add" || op_type == "elementwise_mul" || -- GitLab