diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index e1fe856e3c03e892f1c8d2c11063136da7b33ab6..97ca7c37c7f0e7525a258c39c8b1147e22f10250 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -77,6 +77,15 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("optim_input_shape", new std::map>( argument->optim_input_shape())); + // Now, shape tensor value is not explicit set by user, + // it is collected through API CollectShapeRangeInfo. + pass->Set("max_shape_tensor", + new std::map>()); + pass->Set("min_shape_tensor", + new std::map>()); + pass->Set("optim_shape_tensor", + new std::map>()); + // tuned trt dynamic_shape pass->Set("trt_tuned_dynamic_shape", new bool(argument->tensorrt_tuned_dynamic_shape())); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 05d66d7fe2793d35513333f67fdc25b56557d439..9c63c95fd91ba68be578d98bffc4cb60362fc40e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -317,6 +317,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( auto opt_input_shape = Get>>("optim_input_shape"); + auto min_shape_tensor = + Get>>("min_shape_tensor"); + auto max_shape_tensor = + Get>>("max_shape_tensor"); + auto opt_shape_tensor = + Get>>("optim_shape_tensor"); + auto allow_build_at_runtime = Get("trt_allow_build_at_runtime"); auto shape_range_info_path = Get("trt_shape_range_info_path"); auto trt_tuned_dynamic_shape = Get("trt_tuned_dynamic_shape"); @@ -326,7 +333,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp( inference::DeserializeShapeRangeInfo(shape_range_info_path, &min_input_shape, &max_input_shape, - &opt_input_shape); + &opt_input_shape, + &min_shape_tensor, + &max_shape_tensor, + &opt_shape_tensor); } // The following procedure is used to rename all the intermediate @@ -511,6 +521,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp( min_input_shape, max_input_shape, opt_input_shape, + min_shape_tensor, + max_shape_tensor, + opt_shape_tensor, disable_trt_plugin_fp16, static_cast(Get("model_precision"))); trt_engine->SetUseOSS(Get("use_varseqlen")); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a298cd1817e4a708a9ecd2413e83e695e116685b..42126b5048e685cac7ae1106a925faa0b55892c6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1747,10 +1747,39 @@ void AnalysisPredictor::CollectShapeRangeInfo() { if (!var->IsType()) { continue; } - framework::DDim dim = var->Get().dims(); + auto tensor = var->Get(); + framework::DDim dim = tensor.dims(); std::vector shape(dim.size()); for (size_t i = 0; i < shape.size(); ++i) shape[i] = dim[i]; shape_info_[name].emplace_back(shape); + + // We need collect value range for shape tensor for Paddle-TRT's use. + // To be noticed, this method to identify all shape tensors is based on + // assumption that all shape tensors in the model have numbers <= 7. + // This is a simple method to identify all shape tensors with some + // mistakes, but it doesn't matter. + auto is_shape_tensor = tensor.numel() <= 7 && tensor.numel() >= 1; + if (tensor.dtype() == paddle::experimental::DataType::INT32 && + is_shape_tensor) { + std::vector int32_host(tensor.numel()); + if (tensor.place() == platform::CPUPlace()) { + paddle::memory::Copy(platform::CPUPlace(), + int32_host.data(), + platform::CPUPlace(), + tensor.data(), + tensor.numel() * sizeof(int)); + } else if (tensor.place() == platform::CUDAPlace()) { +#if defined(PADDLE_WITH_CUDA) + paddle::memory::Copy(platform::CPUPlace(), + int32_host.data(), + platform::CUDAPlace(), + tensor.data(), + tensor.numel() * sizeof(int), + nullptr); +#endif + } + shape_tensor_value_[name].emplace_back(int32_host); + } } } @@ -1758,43 +1787,61 @@ void AnalysisPredictor::StatisticShapeRangeInfo() { std::map> min_shapes; std::map> max_shapes; std::map> opt_shapes; - for (auto it : shape_info_) { - auto name = it.first; - auto shapes = it.second; - - std::vector min_shape(shapes[0].begin(), shapes[0].end()); - std::vector max_shape(shapes[0].begin(), shapes[0].end()); - std::vector opt_shape(shapes[0].begin(), shapes[0].end()); - - auto ShapeMaxFreq = [](const std::map &m) -> int32_t { - std::vector> counter; - for (auto &it : m) counter.push_back(it); - std::sort( - counter.begin(), - counter.end(), - [](std::pair &a, std::pair &b) { - return a.second > b.second; - }); - return counter[0].first; - }; - - for (size_t d = 0; d < shapes[0].size(); ++d) { - std::map counter; - for (size_t i = 0; i < shapes.size(); ++i) { - counter[shapes[i][d]] += 1; - if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d]; - if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d]; - } - opt_shape[d] = ShapeMaxFreq(counter); - } - - min_shapes[name] = min_shape; - max_shapes[name] = max_shape; - opt_shapes[name] = opt_shape; - } + std::map> min_values; + std::map> max_values; + std::map> opt_values; + + auto extract_min_max_opt = + [](std::map> &min_data, + decltype(min_data) max_data, + decltype(min_data) opt_data, + decltype(shape_info_) shape_data) { + for (auto it : shape_data) { + auto name = it.first; + auto shapes = it.second; + + std::vector min_shape(shapes[0].begin(), shapes[0].end()); + std::vector max_shape(shapes[0].begin(), shapes[0].end()); + std::vector opt_shape(shapes[0].begin(), shapes[0].end()); + + auto ShapeMaxFreq = + [](const std::map &m) -> int32_t { + std::vector> counter; + for (auto &it : m) counter.push_back(it); + std::sort(counter.begin(), + counter.end(), + [](std::pair &a, + std::pair &b) { + return a.second > b.second; + }); + return counter[0].first; + }; + + for (size_t d = 0; d < shapes[0].size(); ++d) { + std::map counter; + for (size_t i = 0; i < shapes.size(); ++i) { + counter[shapes[i][d]] += 1; + if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d]; + if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d]; + } + opt_shape[d] = ShapeMaxFreq(counter); + } - inference::SerializeShapeRangeInfo( - config_.shape_range_info_path(), min_shapes, max_shapes, opt_shapes); + min_data[name] = min_shape; + max_data[name] = max_shape; + opt_data[name] = opt_shape; + } + }; + extract_min_max_opt(min_shapes, max_shapes, opt_shapes, shape_info_); + extract_min_max_opt(min_values, max_values, opt_values, shape_tensor_value_); + + inference::SerializeShapeRangeInfo(config_.shape_range_info_path(), + min_shapes, + max_shapes, + opt_shapes, + min_values, + max_values, + opt_values); } bool AnalysisPredictor::LoadProgramDesc() { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 7b561275cbc52a0c424252c00099f217645c0188..ff34bac545d80ecb7f65f7b4b17a700113f8c2b5 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -514,6 +514,7 @@ class AnalysisPredictor : public PaddlePredictor { bool status_is_cloned_{false}; std::map>> shape_info_; + std::map>> shape_tensor_value_; static int clone_num_; bool private_context_{false}; diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 06886aa782fa7cf1087f982c73b4f8a2d58d1140..c6c0d0479c9e5fc2c9139457fee43d0206c2af0d 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -275,6 +275,35 @@ void TensorRTEngine::FreezeNetwork() { nvinfer1::OptProfileSelector::kOPT, Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true)); } + + for (int input_id = 0; input_id < network()->getNbInputs(); input_id++) { + auto input_name = network()->getInput(input_id)->getName(); + if (!itensor_map_.count(input_name)) continue; + if (!GetITensor(input_name)->isShapeTensor()) continue; + PADDLE_ENFORCE_EQ(min_shape_tensor_.count(input_name) && + max_shape_tensor_.count(input_name) && + optim_shape_tensor_.count(input_name), + true, + platform::errors::InvalidArgument( + "Fail to find min/max/optim shape value for TRT " + "network's shape tensor input named %s.", + input_name)); + auto min_vec = min_shape_tensor_.at(input_name); + optim_profiles_[i]->setShapeValues(input_name, + nvinfer1::OptProfileSelector::kMIN, + min_vec.data(), + min_vec.size()); + optim_profiles_[i]->setShapeValues(input_name, + nvinfer1::OptProfileSelector::kMAX, + max_shape_tensor_[input_name].data(), + min_vec.size()); + optim_profiles_[i]->setShapeValues( + input_name, + nvinfer1::OptProfileSelector::kOPT, + optim_shape_tensor_[input_name].data(), + min_vec.size()); + } + infer_builder_config_->addOptimizationProfile(optim_profiles_[i]); } if (WithFp16() && disable_trt_plugin_fp16()) { diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 138f7faadf3adc77c4e527bc83cd10d8e048d421..f19b9fc505acf0aa444d3d38ef23cbb88065a715 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -217,6 +217,9 @@ class TensorRTEngine { const ShapeMapType min_input_shape = {}, const ShapeMapType max_input_shape = {}, const ShapeMapType optim_input_shape = {}, + const ShapeMapType min_shape_tensor = {}, + const ShapeMapType max_shape_tensor = {}, + const ShapeMapType optim_shape_tensor = {}, bool disable_trt_plugin_fp16 = false, phi::DataType model_precision = phi::DataType::FLOAT32, nvinfer1::ILogger& logger = NaiveLogger::Global()) @@ -228,6 +231,9 @@ class TensorRTEngine { min_input_shape_(min_input_shape), max_input_shape_(max_input_shape), optim_input_shape_(optim_input_shape), + min_shape_tensor_(min_shape_tensor), + max_shape_tensor_(max_shape_tensor), + optim_shape_tensor_(optim_shape_tensor), disable_trt_plugin_fp16_(disable_trt_plugin_fp16), model_precision_(model_precision), logger_(logger) { @@ -443,6 +449,9 @@ class TensorRTEngine { ShapeMapType min_input_shape() { return min_input_shape_; } ShapeMapType max_input_shape() { return max_input_shape_; } ShapeMapType optim_input_shape() { return optim_input_shape_; } + ShapeMapType min_shape_tensor() { return min_shape_tensor_; } + ShapeMapType max_shape_tensor() { return max_shape_tensor_; } + ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; } bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape, std::vector* changed) { @@ -641,6 +650,9 @@ class TensorRTEngine { ShapeMapType min_input_shape_; ShapeMapType max_input_shape_; ShapeMapType optim_input_shape_; + ShapeMapType min_shape_tensor_; + ShapeMapType max_shape_tensor_; + ShapeMapType optim_shape_tensor_; bool disable_trt_plugin_fp16_{false}; phi::DataType model_precision_{phi::DataType::FLOAT32}; bool use_varseqlen_{false}; @@ -741,6 +753,9 @@ class TRTEngineManager { const std::map> min_input_shape = {}, const std::map> max_input_shape = {}, const std::map> optim_input_shape = {}, + const std::map> min_shape_tensor = {}, + const std::map> max_shape_tensor = {}, + const std::map> optim_shape_tensor = {}, bool disable_trt_plugin_fp16 = false, phi::DataType model_precision = phi::DataType::FLOAT32, nvinfer1::ILogger& logger = NaiveLogger::Global()) { @@ -752,6 +767,9 @@ class TRTEngineManager { min_input_shape, max_input_shape, optim_input_shape, + min_shape_tensor, + max_shape_tensor, + optim_shape_tensor, disable_trt_plugin_fp16, model_precision, logger); diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 6a253d2815941efb16e52676513be9a3ccfb32a4..ddf0ec5dd335e11ca12c5117750ae8cdf9cc6012 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -31,6 +31,137 @@ namespace paddle { namespace inference { namespace tensorrt { +class TensorRTDynamicShapeValueEngineTest : public ::testing::Test { + protected: + void SetUp() override { + ctx_ = new phi::GPUContext(platform::CUDAPlace(0)); + ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(platform::CUDAPlace(0), ctx_->stream()) + .get()); + ctx_->SetHostAllocator( + paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get()); + ctx_->SetZeroAllocator( + paddle::memory::allocation::AllocatorFacade::Instance() + .GetZeroAllocator(platform::CUDAPlace(0)) + .get()); + ctx_->SetPinnedAllocator( + paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CUDAPinnedPlace()) + .get()); + ctx_->PartialInitWithAllocator(); + + std::map> min_input_shape = { + {"input", {1, 32}}}; + std::map> max_input_shape = { + {"input", {18, 32}}}; + std::map> optim_input_shape = { + {"input", {18, 32}}}; + std::map> min_input_value = { + {"shape", {1, 8, 4}}}; + std::map> max_input_value = { + {"shape", {18, 8, 4}}}; + std::map> optim_input_value = { + {"shape", {18, 8, 4}}}; + engine_ = new TensorRTEngine(16, + 1 << 10, + AnalysisConfig::Precision::kFloat32, + nullptr, + 0, + min_input_shape, + max_input_shape, + optim_input_shape, + min_input_value, + max_input_value, + optim_input_value, + false, + phi::DataType::FLOAT32, + NaiveLogger::Global()); + engine_->InitNetwork(); + } + + void TearDown() override { + if (engine_) { + delete engine_; + engine_ = nullptr; + } + } + + void PrepareInputOutput(const std::vector &input, + std::vector output_shape) { + paddle::framework::TensorFromVector(input, *ctx_, &input_); + output_.Resize(phi::make_ddim(output_shape)); + } + void PrepareShapeInput(const std::vector &input) { + paddle::framework::TensorFromVector(input, *ctx_, &shape_); + } + void GetOutput(std::vector *output) { + paddle::framework::TensorToVector(output_, *ctx_, output); + } + + protected: + framework::LoDTensor input_; + framework::LoDTensor shape_; + framework::LoDTensor output_; + TensorRTEngine *engine_; + phi::GPUContext *ctx_; +}; + +TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) { + std::vector buffers(3); + std::cout << "with_dynamic_shape: " << engine_->with_dynamic_shape() + << std::endl; + auto *x = engine_->DeclareInput( + "input", nvinfer1::DataType::kFLOAT, nvinfer1::Dims2{-1, 32}); + nvinfer1::Dims shape_dim; + shape_dim.nbDims = 1; + shape_dim.d[0] = 3; + auto *shape = + engine_->DeclareInput("shape", nvinfer1::DataType::kINT32, shape_dim); + auto layer = engine_->network()->addShuffle(*x); + layer->setInput(1, *shape); + PADDLE_ENFORCE_NOT_NULL( + layer, + platform::errors::InvalidArgument("TRT shuffle layer building failed.")); + engine_->DeclareOutput(layer, 0, "y"); + engine_->FreezeNetwork(); + ASSERT_EQ(engine_->engine()->getNbBindings(), 3); + + std::vector x_v(8 * 32); + for (int i = 0; i < 8 * 32; i++) { + x_v[i] = i % (8 * 32); + } + + std::vector shape_v = {8, 8, 4}; + PrepareInputOutput(x_v, {8, 8, 4}); + PrepareShapeInput(shape_v); + engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32}); + engine_->context()->setBindingDimensions(1, shape_dim); + engine_->context()->setInputShapeBinding(1, shape_v.data()); + + auto *x_gpu_data = input_.mutable_data(ctx_->GetPlace()); + auto *shape_gpu_data = shape_.mutable_data(ctx_->GetPlace()); + auto *y_gpu_data = output_.mutable_data(ctx_->GetPlace()); + + buffers[0] = reinterpret_cast(x_gpu_data); + buffers[1] = reinterpret_cast(shape_gpu_data); + buffers[2] = reinterpret_cast(y_gpu_data); + + engine_->Execute(-1, &buffers, ctx_->stream()); + cudaStreamSynchronize(ctx_->stream()); + std::vector y_cpu; + GetOutput(&y_cpu); + ASSERT_EQ(y_cpu[0], 0); + ASSERT_EQ(y_cpu[1], 1); + auto dims = engine_->context()->getBindingDimensions(2); + ASSERT_EQ(dims.nbDims, 3); + ASSERT_EQ(dims.d[0], 8); + ASSERT_EQ(dims.d[1], 8); + ASSERT_EQ(dims.d[2], 4); + return; +} + class TensorRTDynamicEngineTest : public ::testing::Test { protected: void SetUp() override { @@ -67,6 +198,9 @@ class TensorRTDynamicEngineTest : public ::testing::Test { min_input_shape, max_input_shape, optim_input_shape, + std::map>(), + std::map>(), + std::map>(), false, phi::DataType::FLOAT32, NaiveLogger::Global()); @@ -241,6 +375,9 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test { min_input_shape, max_input_shape, optim_input_shape, + std::map>(), + std::map>(), + std::map>(), false, phi::DataType::FLOAT32, NaiveLogger::Global()); diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc index e45e3a1035fe5d6d9b0701808644f09b2ababa70..ef342a4eefce3dbb7846c74ace894cabb508694b 100644 --- a/paddle/fluid/inference/utils/io_utils.cc +++ b/paddle/fluid/inference/utils/io_utils.cc @@ -182,7 +182,10 @@ void SerializeShapeRangeInfo( const std::string &path, const std::map> &min_shape, const std::map> &max_shape, - const std::map> &opt_shape) { + const std::map> &opt_shape, + const std::map> &min_value, + const std::map> &max_value, + const std::map> &opt_value) { paddle::inference::proto::ShapeRangeInfos shape_range_infos; for (auto it : min_shape) { auto *s = shape_range_infos.add_shape_range_info(); @@ -192,10 +195,18 @@ void SerializeShapeRangeInfo( s->add_max_shape(max_shape.at(it.first)[i]); s->add_opt_shape(opt_shape.at(it.first)[i]); } + // If it.first is a shape tensor, we should collect values from it. + if (min_value.count(it.first)) { + for (size_t i = 0; i < min_value.at(it.first).size(); ++i) { + s->add_min_value(min_value.at(it.first)[i]); + s->add_max_value(max_value.at(it.first)[i]); + s->add_opt_value(opt_value.at(it.first)[i]); + } + } } - inference::SerializeShapeRangeInfo(path, shape_range_infos); } + void DeserializeShapeRangeInfo( const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) { int fd = open(path.c_str(), O_RDONLY); @@ -213,7 +224,10 @@ void DeserializeShapeRangeInfo( const std::string &path, std::map> *min_shape, std::map> *max_shape, - std::map> *opt_shape) { + std::map> *opt_shape, + std::map> *min_value, + std::map> *max_value, + std::map> *opt_value) { paddle::inference::proto::ShapeRangeInfos shape_range_infos; DeserializeShapeRangeInfo(path, &shape_range_infos); for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) { @@ -236,6 +250,26 @@ void DeserializeShapeRangeInfo( opt_shape->insert(std::make_pair(name, tmp)); } } + for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) { + auto info = shape_range_infos.shape_range_info(i); + auto name = info.name(); + if (min_value->count(name) || max_value->count(name) || + opt_value->count(name)) { + continue; + } else { + std::vector tmp(info.min_value_size()); + for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.min_value(k); + min_value->insert(std::make_pair(name, tmp)); + + tmp.resize(info.max_value_size()); + for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.max_value(k); + max_value->insert(std::make_pair(name, tmp)); + + tmp.resize(info.opt_value_size()); + for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.opt_value(k); + opt_value->insert(std::make_pair(name, tmp)); + } + } } void UpdateShapeRangeInfo( @@ -264,6 +298,7 @@ void UpdateShapeRangeInfo( } } } + inference::SerializeShapeRangeInfo(path, shape_range_infos); } diff --git a/paddle/fluid/inference/utils/io_utils.h b/paddle/fluid/inference/utils/io_utils.h index 682bbdef05edcca79456b3e134d23caab73697a8..64d6b3be4d94cb36593389c2b5df5650ff298083 100644 --- a/paddle/fluid/inference/utils/io_utils.h +++ b/paddle/fluid/inference/utils/io_utils.h @@ -42,23 +42,22 @@ void SerializePDTensorsToFile(const std::string& path, const std::vector& tensors); void DeserializePDTensorsToFile(const std::string& path, std::vector* tensors); - -void SerializeShapeRangeInfo( - const std::string& path, - const paddle::inference::proto::ShapeRangeInfos& info); void SerializeShapeRangeInfo( const std::string& path, const std::map>& min_shape, const std::map>& max_shape, - const std::map>& opt_shape); -void DeserializeShapeRangeInfo(const std::string& path, - paddle::inference::proto::ShapeRangeInfos* info); + const std::map>& opt_shape, + const std::map>& min_value, + const std::map>& max_value, + const std::map>& opt_value); void DeserializeShapeRangeInfo( const std::string& path, std::map>* min_shape, std::map>* max_shape, - std::map>* opt_shape); - + std::map>* opt_shape, + std::map>* min_value, + std::map>* max_value, + std::map>* opt_value); void UpdateShapeRangeInfo( const std::string& path, const std::map>& min_shape, diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc index 7707140fb9762efaab1308fc0ba64ee04f67c0b9..812c22aa67d2eef8b3e7989461212a167ac52ab7 100644 --- a/paddle/fluid/inference/utils/io_utils_tester.cc +++ b/paddle/fluid/inference/utils/io_utils_tester.cc @@ -100,28 +100,48 @@ TEST(infer_io_utils, tensors) { TEST(shape_info_io, read_and_write) { const std::string path = "test_shape_info_io"; std::map> min_shape, max_shape, opt_shape; + std::map> min_value, max_value, opt_value; min_shape.insert( std::make_pair("test1", std::vector{1, 3, 112, 112})); max_shape.insert( std::make_pair("test1", std::vector{1, 3, 224, 224})); opt_shape.insert( std::make_pair("test1", std::vector{1, 3, 224, 224})); + min_value.insert( + std::make_pair("test1", std::vector{1, 3, 112, 112})); + max_value.insert( + std::make_pair("test1", std::vector{1, 3, 224, 224})); + opt_value.insert( + std::make_pair("test1", std::vector{1, 3, 224, 224})); paddle::inference::SerializeShapeRangeInfo( - path, min_shape, max_shape, opt_shape); + path, min_shape, max_shape, opt_shape, min_value, max_value, opt_value); min_shape.clear(); max_shape.clear(); opt_shape.clear(); + min_value.clear(); + max_value.clear(); + opt_value.clear(); opt_shape.insert( std::make_pair("test2", std::vector{1, 3, 224, 224})); - paddle::inference::DeserializeShapeRangeInfo( - path, &min_shape, &max_shape, &opt_shape); + paddle::inference::DeserializeShapeRangeInfo(path, + &min_shape, + &max_shape, + &opt_shape, + &min_value, + &max_value, + &opt_value); min_shape.insert(std::make_pair("test1", std::vector{1, 3, 56, 56})); std::vector names{"test1"}; paddle::inference::UpdateShapeRangeInfo( path, min_shape, max_shape, opt_shape, names); - ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo( - "no_exists_file", &min_shape, &max_shape, &opt_shape); + ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo("no_exists_file", + &min_shape, + &max_shape, + &opt_shape, + &min_value, + &max_value, + &opt_value); , paddle::platform::EnforceNotMet); } diff --git a/paddle/fluid/inference/utils/shape_range_info.proto b/paddle/fluid/inference/utils/shape_range_info.proto index fcb2d635b52261ed810c18536ec8ad22dd88d575..53f018cb5934899f537a54e8e285b0079297b4b0 100644 --- a/paddle/fluid/inference/utils/shape_range_info.proto +++ b/paddle/fluid/inference/utils/shape_range_info.proto @@ -23,6 +23,9 @@ message ShapeRangeInfos { repeated int32 min_shape = 2; repeated int32 max_shape = 3; repeated int32 opt_shape = 4; + repeated int32 min_value = 5; + repeated int32 max_value = 6; + repeated int32 opt_value = 7; } repeated ShapeRangeInfo shape_range_info = 1; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 2acb33def7d9ed4071ae6452fc9b6a4dccfc3270..178c0fc22a57a91cd163f6005f5ac7498b69cc8a 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -554,6 +554,18 @@ class TensorRTEngineOp : public framework::OperatorBase { #if IS_TRT_VERSION_GE(6000) trt_context->setBindingDimensions( bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true)); + // If this x is a shape tensor, we need call setInputShapeBinding + if (engine->engine()->isShapeBinding(bind_index) && + engine->engine()->bindingIsInput(bind_index)) { + std::vector shape_v(t.numel()); + paddle::memory::Copy(platform::CPUPlace(), + shape_v.data(), + platform::CUDAPlace(), + t.data(), + t.numel() * sizeof(int), + nullptr); + trt_context->setInputShapeBinding(bind_index, shape_v.data()); + } #endif } runtime_batch = t_shape[0];