diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 077ba32ba89c1b406e3fc2813bfc4c993ac4f951..f6ecf76d016759a2df05d8423635f0d560874ac2 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -295,20 +295,215 @@ class OpConverter { engine->ClearWeights(); } + // rank(result) = rank(input) + nvinfer1::ITensor* Gather(nvinfer1::ITensor* input, + const std::vector indices, int axis = 0) { + auto* indices_tensor = Add1DConstantLayer(indices, " "); + auto* result = + TRT_ENGINE_ADD_LAYER(engine_, Gather, *input, *indices_tensor, axis) + ->getOutput(0); + return result; + } + + // paddle allows negative index + // for axis length = 5, paddle allows [-5, 4] + nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape, + nvinfer1::ITensor* indices) { + int rank = input_shape->getDimensions().nbDims; + std::vector zero = std::vector(rank, 0); + std::vector minus_one = std::vector(rank, -1); + nvinfer1::ITensor* zero_tensor = Add1DConstantLayer(zero); + nvinfer1::ITensor* minus_one_tensor = Add1DConstantLayer(minus_one); + // -1, 0 + auto* sign = Max(Min(indices, zero_tensor), minus_one_tensor); + return Sub(indices, Prod(sign, input_shape)); + } + + nvinfer1::ITensor* Shape(nvinfer1::ITensor* input) { + return TRT_ENGINE_ADD_LAYER(engine_, Shape, *input)->getOutput(0); + } + + // Concat not make rank changed + nvinfer1::ITensor* Concat(const std::vector& inputs, + int axis = 0) { + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, inputs.data(), + inputs.size()); + if (axis != 0) layer->setAxis(axis); + nvinfer1::ITensor* c = layer->getOutput(0); + return c; + } + + nvinfer1::ITensor* Sum(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Prod(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kPROD) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Min(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kMIN) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Max(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kMAX) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Sub(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Div(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b, + nvinfer1::ElementWiseOperation::kDIV) + ->getOutput(0); + return c; + } + + nvinfer1::ITensor* Act(nvinfer1::ITensor* a, + nvinfer1::ActivationType act_type) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, Activation, *a, act_type)->getOutput(0); + return c; + } + + // Get element tensor of 1D shape tensor + nvinfer1::ITensor* GetEleTensorOfShape(nvinfer1::ITensor* shape_tensor, + int index, bool is_scalar = false) { + auto* tensor = + TRT_ENGINE_ADD_LAYER(engine_, Gather, *shape_tensor, + *Add1DConstantLayer(index, " ", is_scalar), 0) + ->getOutput(0); + return tensor; + } + + // Create and add Multi-D constant float layer + nvinfer1::ITensor* AddConstantLayer(const float* data, + const std::vector& weight_dims, + const std::string& weight_name) { + std::unique_ptr tmp_tensor(new framework::Tensor()); + int data_size = std::accumulate(weight_dims.begin(), weight_dims.end(), 1, + std::multiplies()); + tmp_tensor->Resize({data_size}); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = data[i]; + } + engine_->SetWeights(weight_name, std::move(tmp_tensor)); + + TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, + static_cast(tmp_data), + static_cast(data_size)}; + nvinfer1::Dims trt_dims; + trt_dims.nbDims = weight_dims.size(); + for (size_t i = 0; i < weight_dims.size(); i++) + trt_dims.d[i] = weight_dims[i]; + auto const_layer = + TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims, weight.get()); + return const_layer->getOutput(0); + } + + // Create and add 1D constant float layer + nvinfer1::ITensor* Add1DConstantLayer(const std::vector& data, + const std::string& weight_name = "", + bool scalar = false) { + std::unique_ptr tmp_tensor(new framework::Tensor()); + int data_size = data.size(); + tmp_tensor->Resize({data_size}); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = data[i]; + } + engine_->SetWeights(weight_name, std::move(tmp_tensor)); + + TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, + static_cast(tmp_data), + static_cast(data_size)}; + nvinfer1::Dims input_shape; + input_shape.nbDims = scalar ? 0 : 1; + input_shape.d[0] = data_size; + auto const_layer = + TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get()); + return const_layer->getOutput(0); + } + + // Create and add 1D constant layer + nvinfer1::ITensor* Add1DConstantLayer(const std::vector& data, + const std::string& weight_name = "", + bool scalar = false) { + std::unique_ptr tmp_tensor(new framework::Tensor()); + int data_size = data.size(); + tmp_tensor->Resize({data_size}); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = data[i]; + } + engine_->SetWeights(weight_name, std::move(tmp_tensor)); + + TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32, + static_cast(tmp_data), + static_cast(data_size)}; + nvinfer1::Dims input_shape; + input_shape.nbDims = scalar ? 0 : 1; + input_shape.d[0] = data_size; + auto const_layer = + TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get()); + return const_layer->getOutput(0); + } + + nvinfer1::ITensor* Add1DConstantLayer(nvinfer1::Dims data, + const std::string& weight_name = "", + bool scalar = false) { + std::vector tmp_data; + for (int i = 0; i < data.nbDims; i++) tmp_data.push_back(data.d[i]); + return Add1DConstantLayer(tmp_data, weight_name, scalar); + } + + nvinfer1::ITensor* Add1DConstantLayer(int32_t data, + const std::string& weight_name = "", + bool scalar = false) { + std::vector tmp_data; + tmp_data.push_back(data); + return Add1DConstantLayer(tmp_data, weight_name, scalar); + } + void RreplenishLayerAndOutput( nvinfer1::ILayer* layer, const std::string& layer_type, const std::vector& output_tensor_names, bool test_mode = false) { size_t num_out = output_tensor_names.size(); + std::string layer_name = layer_type + " (Output: "; for (size_t i = 0; i < num_out; i++) { layer->getOutput(i)->setName(output_tensor_names[i].c_str()); engine_->SetITensor(output_tensor_names[i], layer->getOutput(i)); if (test_mode) { engine_->DeclareOutput(output_tensor_names[i]); } + layer_name += output_tensor_names[i]; + if (i != num_out - 1) layer_name += ", "; } - layer->setName( - (layer_type + " (Output: " + output_tensor_names[0] + ")").c_str()); + layer->setName((layer_name + ")").c_str()); } void SetEngine(TensorRTEngine* engine) { engine_ = engine; } diff --git a/paddle/fluid/inference/tensorrt/convert/split_op.cc b/paddle/fluid/inference/tensorrt/convert/split_op.cc index 591eb06a362024d675814975dbc168652a4dc5eb..1638515ffc47f2c42c140ca28ff9ad102195511e 100644 --- a/paddle/fluid/inference/tensorrt/convert/split_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/split_op.cc @@ -29,7 +29,6 @@ class SplitOpConverter : public OpConverter { // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto input_dims = input->getDimensions(); - size_t input_num = op_desc.Input("X").size(); size_t output_num = op_desc.Output("Out").size(); // Get Attrs @@ -41,48 +40,115 @@ class SplitOpConverter : public OpConverter { if (op_desc.HasAttr("num")) { num = BOOST_GET_CONST(int, op_desc.GetAttr("num")); } - + nvinfer1::ITensor* shape_tensor = nullptr; if (engine_->with_dynamic_shape()) { -#if IS_TRT_VERSION_GE(6000) axis += (axis < 0) ? input_dims.nbDims : 0; -#endif + // only be called in dynamic_shape mode + shape_tensor = Shape(input); } else { axis += (axis < 0) ? input_dims.nbDims : -1; } - if (num > 0) { - int64_t in_axis_dim = input_dims.d[axis]; - size_t out_axis_dim = in_axis_dim / num; - for (int i = 0; i < num; ++i) { - output_lengths.push_back(out_axis_dim); + bool in_axis_dim_dynamic = false; + nvinfer1::ITensor* avg_len_tensor = nullptr; + // need infer output_lengths + if (num > 0 && output_lengths.empty()) { + if (input_dims.d[axis] > 0) { + int64_t in_axis_dim = input_dims.d[axis]; + size_t out_axis_dim = in_axis_dim / num; + for (int i = 0; i < num; ++i) { + output_lengths.push_back(out_axis_dim); + } + } else { + in_axis_dim_dynamic = true; + auto* num_tensor = Add1DConstantLayer(num); + avg_len_tensor = + Div(GetEleTensorOfShape(shape_tensor, axis), num_tensor); } } nvinfer1::ILayer* layer = nullptr; +#if IS_TRT_VERSION_GE(6000) + if (engine_->with_dynamic_shape()) { + nvinfer1::Dims trt_step_dims; + trt_step_dims.nbDims = input->getDimensions().nbDims; + for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1; + + std::vector gather_indices; + gather_indices.resize(trt_step_dims.nbDims); + std::iota(gather_indices.begin(), gather_indices.end(), 0); + gather_indices[axis] = gather_indices.size(); + std::vector zeros(trt_step_dims.nbDims, 0); + auto* zeros_tensor = Add1DConstantLayer(zeros); + // input : [N,C,H,W] + int start_point = 0; + for (size_t i = 0; i < output_num; i++) { + nvinfer1::ITensor* this_len_tensor = nullptr; + nvinfer1::ITensor* start_point_tensor = nullptr; + if (!in_axis_dim_dynamic) { + this_len_tensor = Add1DConstantLayer(output_lengths[i]); + start_point_tensor = Add1DConstantLayer(start_point); + start_point += output_lengths[i]; + } else { + this_len_tensor = avg_len_tensor; + auto* i_tensor = Add1DConstantLayer(i); + start_point_tensor = Prod(i_tensor, avg_len_tensor); + } + + std::vector concat_inputs1 = {zeros_tensor, + start_point_tensor}; + std::vector concat_inputs2 = {shape_tensor, + this_len_tensor}; + auto* start_tensor = Gather(Concat(concat_inputs1), gather_indices); + auto* size_tensor = Gather(Concat(concat_inputs2), gather_indices); + layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, trt_step_dims, + trt_step_dims, trt_step_dims); + layer->setInput(1, *start_tensor); + layer->setInput(2, *size_tensor); + + auto output_name = op_desc.Output("Out")[i]; + RreplenishLayerAndOutput(layer, "split", {output_name}, test_mode); + } + } else { + auto chw_input_dims = input->getDimensions(); + nvinfer1::Dims trt_start_dims; + trt_start_dims.nbDims = chw_input_dims.nbDims; + memset(trt_start_dims.d, 0, sizeof(int32_t) * chw_input_dims.nbDims); + nvinfer1::Dims trt_size_dims = chw_input_dims; + nvinfer1::Dims trt_step_dims; + trt_step_dims.nbDims = chw_input_dims.nbDims; + for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1; + + // input : [C,H,W] + for (size_t i = 0; i < output_num; i++) { + trt_start_dims.d[axis] = std::accumulate(output_lengths.begin(), + output_lengths.begin() + i, 0); + trt_size_dims.d[axis] = output_lengths[i]; + layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, trt_start_dims, + trt_size_dims, trt_step_dims); + auto output_name = op_desc.Output("Out")[i]; + RreplenishLayerAndOutput(layer, "split", {output_name}, test_mode); + } + } +#else if (engine_->with_dynamic_shape()) { bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); plugin::SplitPluginDynamic* plugin = new plugin::SplitPluginDynamic(axis, output_lengths, with_fp16); - layer = engine_->AddDynamicPlugin(&input, input_num, plugin); + layer = engine_->AddDynamicPlugin(&input, 1, plugin); } else { bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); plugin::SplitPlugin* plugin = new plugin::SplitPlugin(axis, output_lengths, with_fp16); - layer = engine_->AddPluginV2Ext(&input, input_num, plugin); + layer = engine_->AddPluginV2Ext(&input, 1, plugin); } - - std::string layer_name = "split (Output: "; + std::vector output_names; for (size_t i = 0; i < output_num; i++) { - auto output_name = op_desc.Output("Out")[i]; - layer->getOutput(i)->setName(output_name.c_str()); - engine_->SetITensor(output_name, layer->getOutput(i)); - layer_name += output_name; - if (test_mode) { - engine_->DeclareOutput(output_name); - } + output_names.push_back(op_desc.Output("Out")[i]); } - layer->setName((layer_name + ")").c_str()); + RreplenishLayerAndOutput(layer, "split", output_names, test_mode); +#endif } }; diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index b28fe827156c3f0cdf624b9079cac9ce2f374c9f..0260c489b5041ea15fdf0eef3db0ff2c2854d51f 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -686,7 +686,7 @@ class TensorRTEngine { // them, and an macro like this is more extensible when underlying TensorRT // library add new layer supports. #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \ - engine__->network()->add##layer__(__VA_ARGS__); + engine__->network()->add##layer__(__VA_ARGS__) class TRTEngineManager { public: diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 5d4a8ee1f0c84796fb3055c8e2956e1a6889932a..d9b1e9b85f7e4096a0794c435f5a2c3689b134d3 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1041,15 +1041,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, return false; } } - } else { - for (size_t i = 0; i < axes.size(); i++) { - if (starts[i] < 0 || ends[i] < 0) { - VLOG(3) << "Invalid slice attribute 'starts' or 'ends'. " - "Negative starts or ends not supported in TensorRT " - "when running in dynamic shape mode."; - return false; - } - } } } } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py index f82acb204f0a241671e72a37a4b9abbcb1f6b04b..003c84c4c5ab069a6ee47e09d495ca3dbb4fc74d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py @@ -29,10 +29,7 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - - for x in attrs[0]["decrease_axis"]: - if x < 0: - return False + out_shape = list(inputs['input_data'].shape) for x in range(len(attrs[0]["axes"])): start = 0 end = 0 @@ -48,15 +45,20 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): end = attrs[0]["ends"][x] start = max(0, start) end = max(0, end) + out_shape[attrs[0]["axes"][x]] = end - start if start >= end: return False - + for x in attrs[0]["decrease_axis"]: + if x < 0: + return False + if (out_shape[x] != 1): + return False return True def sample_program_configs(self): def generate_input1(attrs: List[Dict[str, Any]]): - return np.ones([6, 6, 64, 64]).astype(np.float32) + return np.random.random([6, 6, 64, 64]).astype(np.float32) for axes in [[0, 1], [1, 3], [2, 3]]: for starts in [[0, 1]]: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py index 38ca6963e94b2292c738f1fe28627392230f34cd..e8c283acc3b8fe8a32fe130368afde6fccb40d4e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py @@ -73,13 +73,13 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: - return np.ones([batch, 3, 3, 24]).astype(np.float32) + return np.random.random([batch, 3, 3, 24]).astype(np.float32) elif self.dims == 3: - return np.ones([batch, 3, 24]).astype(np.float32) + return np.random.random([batch, 3, 24]).astype(np.float32) elif self.dims == 2: - return np.ones([batch, 24]).astype(np.float32) + return np.random.random([batch, 24]).astype(np.float32) elif self.dims == 1: - return np.ones([24]).astype(np.float32) + return np.random.random([24]).astype(np.float32) def generate_AxisTensor(attrs: List[Dict[str, Any]]): return np.ones([1]).astype(np.int32) @@ -162,25 +162,33 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { - "split_input": [1, 3, 3, 24] + "split_input": [1, 3 - 1, 3 - 1, 24 - 1] } self.dynamic_shape.max_input_shape = { - "split_input": [9, 3, 3, 24] + "split_input": [9, 3 + 1, 3 + 1, 24 + 1] } self.dynamic_shape.opt_input_shape = { "split_input": [1, 3, 3, 24] } elif self.dims == 3: - self.dynamic_shape.min_input_shape = {"split_input": [1, 3, 24]} - self.dynamic_shape.max_input_shape = {"split_input": [9, 3, 24]} + self.dynamic_shape.min_input_shape = { + "split_input": [1, 3 - 1, 24 - 1] + } + self.dynamic_shape.max_input_shape = { + "split_input": [9, 3 + 1, 24 + 1] + } self.dynamic_shape.opt_input_shape = {"split_input": [1, 3, 24]} elif self.dims == 2: - self.dynamic_shape.min_input_shape = {"split_input": [1, 24]} - self.dynamic_shape.max_input_shape = {"split_input": [9, 24]} + self.dynamic_shape.min_input_shape = { + "split_input": [1, 24 - 1] + } + self.dynamic_shape.max_input_shape = { + "split_input": [9, 24 + 1] + } self.dynamic_shape.opt_input_shape = {"split_input": [1, 24]} elif self.dims == 1: - self.dynamic_shape.min_input_shape = {"split_input": [24]} - self.dynamic_shape.max_input_shape = {"split_input": [24]} + self.dynamic_shape.min_input_shape = {"split_input": [24 - 1]} + self.dynamic_shape.max_input_shape = {"split_input": [24 + 1]} self.dynamic_shape.opt_input_shape = {"split_input": [24]} def clear_dynamic_shape():