未验证 提交 5a44c124 编写于 作者: Z zhoutianzi666 提交者: GitHub

support shape tensor is the input of trt-subgraph (#47066)

上级 5b642140
......@@ -77,6 +77,15 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("optim_input_shape",
new std::map<std::string, std::vector<int>>(
argument->optim_input_shape()));
// Now, shape tensor value is not explicit set by user,
// it is collected through API CollectShapeRangeInfo.
pass->Set("max_shape_tensor",
new std::map<std::string, std::vector<int>>());
pass->Set("min_shape_tensor",
new std::map<std::string, std::vector<int>>());
pass->Set("optim_shape_tensor",
new std::map<std::string, std::vector<int>>());
// tuned trt dynamic_shape
pass->Set("trt_tuned_dynamic_shape",
new bool(argument->tensorrt_tuned_dynamic_shape()));
......
......@@ -319,6 +319,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
auto opt_input_shape =
Get<std::map<std::string, std::vector<int>>>("optim_input_shape");
auto min_shape_tensor =
Get<std::map<std::string, std::vector<int>>>("min_shape_tensor");
auto max_shape_tensor =
Get<std::map<std::string, std::vector<int>>>("max_shape_tensor");
auto opt_shape_tensor =
Get<std::map<std::string, std::vector<int>>>("optim_shape_tensor");
auto allow_build_at_runtime = Get<bool>("trt_allow_build_at_runtime");
auto shape_range_info_path = Get<std::string>("trt_shape_range_info_path");
auto trt_tuned_dynamic_shape = Get<bool>("trt_tuned_dynamic_shape");
......@@ -328,7 +335,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference::DeserializeShapeRangeInfo(shape_range_info_path,
&min_input_shape,
&max_input_shape,
&opt_input_shape);
&opt_input_shape,
&min_shape_tensor,
&max_shape_tensor,
&opt_shape_tensor);
}
// The following procedure is used to rename all the intermediate
......@@ -513,6 +523,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape,
max_input_shape,
opt_input_shape,
min_shape_tensor,
max_shape_tensor,
opt_shape_tensor,
disable_trt_plugin_fp16,
static_cast<phi::DataType>(Get<int>("model_precision")));
trt_engine->SetUseOSS(Get<bool>("use_varseqlen"));
......
......@@ -1749,10 +1749,39 @@ void AnalysisPredictor::CollectShapeRangeInfo() {
if (!var->IsType<framework::LoDTensor>()) {
continue;
}
framework::DDim dim = var->Get<framework::LoDTensor>().dims();
auto tensor = var->Get<framework::LoDTensor>();
framework::DDim dim = tensor.dims();
std::vector<int32_t> shape(dim.size());
for (size_t i = 0; i < shape.size(); ++i) shape[i] = dim[i];
shape_info_[name].emplace_back(shape);
// We need collect value range for shape tensor for Paddle-TRT's use.
// To be noticed, this method to identify all shape tensors is based on
// assumption that all shape tensors in the model have numbers <= 7.
// This is a simple method to identify all shape tensors with some
// mistakes, but it doesn't matter.
auto is_shape_tensor = tensor.numel() <= 7 && tensor.numel() >= 1;
if (tensor.dtype() == paddle::experimental::DataType::INT32 &&
is_shape_tensor) {
std::vector<int> int32_host(tensor.numel());
if (tensor.place() == platform::CPUPlace()) {
paddle::memory::Copy(platform::CPUPlace(),
int32_host.data(),
platform::CPUPlace(),
tensor.data<int>(),
tensor.numel() * sizeof(int));
} else if (tensor.place() == platform::CUDAPlace()) {
#if defined(PADDLE_WITH_CUDA)
paddle::memory::Copy(platform::CPUPlace(),
int32_host.data(),
platform::CUDAPlace(),
tensor.data<int>(),
tensor.numel() * sizeof(int),
nullptr);
#endif
}
shape_tensor_value_[name].emplace_back(int32_host);
}
}
}
......@@ -1760,43 +1789,61 @@ void AnalysisPredictor::StatisticShapeRangeInfo() {
std::map<std::string, std::vector<int32_t>> min_shapes;
std::map<std::string, std::vector<int32_t>> max_shapes;
std::map<std::string, std::vector<int32_t>> opt_shapes;
for (auto it : shape_info_) {
auto name = it.first;
auto shapes = it.second;
std::vector<int32_t> min_shape(shapes[0].begin(), shapes[0].end());
std::vector<int32_t> max_shape(shapes[0].begin(), shapes[0].end());
std::vector<int32_t> opt_shape(shapes[0].begin(), shapes[0].end());
auto ShapeMaxFreq = [](const std::map<int32_t, int32_t> &m) -> int32_t {
std::vector<std::pair<int32_t, int32_t>> counter;
for (auto &it : m) counter.push_back(it);
std::sort(
counter.begin(),
counter.end(),
[](std::pair<int32_t, int32_t> &a, std::pair<int32_t, int32_t> &b) {
return a.second > b.second;
});
return counter[0].first;
};
for (size_t d = 0; d < shapes[0].size(); ++d) {
std::map<int32_t, int32_t> counter;
for (size_t i = 0; i < shapes.size(); ++i) {
counter[shapes[i][d]] += 1;
if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d];
if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d];
}
opt_shape[d] = ShapeMaxFreq(counter);
}
min_shapes[name] = min_shape;
max_shapes[name] = max_shape;
opt_shapes[name] = opt_shape;
}
std::map<std::string, std::vector<int32_t>> min_values;
std::map<std::string, std::vector<int32_t>> max_values;
std::map<std::string, std::vector<int32_t>> opt_values;
auto extract_min_max_opt =
[](std::map<std::string, std::vector<int32_t>> &min_data,
decltype(min_data) max_data,
decltype(min_data) opt_data,
decltype(shape_info_) shape_data) {
for (auto it : shape_data) {
auto name = it.first;
auto shapes = it.second;
std::vector<int32_t> min_shape(shapes[0].begin(), shapes[0].end());
std::vector<int32_t> max_shape(shapes[0].begin(), shapes[0].end());
std::vector<int32_t> opt_shape(shapes[0].begin(), shapes[0].end());
auto ShapeMaxFreq =
[](const std::map<int32_t, int32_t> &m) -> int32_t {
std::vector<std::pair<int32_t, int32_t>> counter;
for (auto &it : m) counter.push_back(it);
std::sort(counter.begin(),
counter.end(),
[](std::pair<int32_t, int32_t> &a,
std::pair<int32_t, int32_t> &b) {
return a.second > b.second;
});
return counter[0].first;
};
for (size_t d = 0; d < shapes[0].size(); ++d) {
std::map<int32_t, int32_t> counter;
for (size_t i = 0; i < shapes.size(); ++i) {
counter[shapes[i][d]] += 1;
if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d];
if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d];
}
opt_shape[d] = ShapeMaxFreq(counter);
}
inference::SerializeShapeRangeInfo(
config_.shape_range_info_path(), min_shapes, max_shapes, opt_shapes);
min_data[name] = min_shape;
max_data[name] = max_shape;
opt_data[name] = opt_shape;
}
};
extract_min_max_opt(min_shapes, max_shapes, opt_shapes, shape_info_);
extract_min_max_opt(min_values, max_values, opt_values, shape_tensor_value_);
inference::SerializeShapeRangeInfo(config_.shape_range_info_path(),
min_shapes,
max_shapes,
opt_shapes,
min_values,
max_values,
opt_values);
}
bool AnalysisPredictor::LoadProgramDesc() {
......
......@@ -515,6 +515,7 @@ class AnalysisPredictor : public PaddlePredictor {
bool status_is_cloned_{false};
std::map<std::string, std::vector<std::vector<int32_t>>> shape_info_;
std::map<std::string, std::vector<std::vector<int32_t>>> shape_tensor_value_;
static int clone_num_;
bool private_context_{false};
......
......@@ -231,6 +231,35 @@ void TensorRTEngine::FreezeNetwork() {
nvinfer1::OptProfileSelector::kOPT,
Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true));
}
for (int input_id = 0; input_id < network()->getNbInputs(); input_id++) {
auto input_name = network()->getInput(input_id)->getName();
if (!itensor_map_.count(input_name)) continue;
if (!GetITensor(input_name)->isShapeTensor()) continue;
PADDLE_ENFORCE_EQ(min_shape_tensor_.count(input_name) &&
max_shape_tensor_.count(input_name) &&
optim_shape_tensor_.count(input_name),
true,
platform::errors::InvalidArgument(
"Fail to find min/max/optim shape value for TRT "
"network's shape tensor input named %s.",
input_name));
auto min_vec = min_shape_tensor_.at(input_name);
optim_profiles_[i]->setShapeValues(input_name,
nvinfer1::OptProfileSelector::kMIN,
min_vec.data(),
min_vec.size());
optim_profiles_[i]->setShapeValues(input_name,
nvinfer1::OptProfileSelector::kMAX,
max_shape_tensor_[input_name].data(),
min_vec.size());
optim_profiles_[i]->setShapeValues(
input_name,
nvinfer1::OptProfileSelector::kOPT,
optim_shape_tensor_[input_name].data(),
min_vec.size());
}
infer_builder_config_->addOptimizationProfile(optim_profiles_[i]);
}
if (WithFp16() && disable_trt_plugin_fp16()) {
......
......@@ -214,6 +214,9 @@ class TensorRTEngine {
const ShapeMapType min_input_shape = {},
const ShapeMapType max_input_shape = {},
const ShapeMapType optim_input_shape = {},
const ShapeMapType min_shape_tensor = {},
const ShapeMapType max_shape_tensor = {},
const ShapeMapType optim_shape_tensor = {},
bool disable_trt_plugin_fp16 = false,
phi::DataType model_precision = phi::DataType::FLOAT32,
nvinfer1::ILogger& logger = NaiveLogger::Global())
......@@ -225,6 +228,9 @@ class TensorRTEngine {
min_input_shape_(min_input_shape),
max_input_shape_(max_input_shape),
optim_input_shape_(optim_input_shape),
min_shape_tensor_(min_shape_tensor),
max_shape_tensor_(max_shape_tensor),
optim_shape_tensor_(optim_shape_tensor),
disable_trt_plugin_fp16_(disable_trt_plugin_fp16),
model_precision_(model_precision),
logger_(logger) {
......@@ -530,6 +536,9 @@ class TensorRTEngine {
ShapeMapType min_input_shape() { return min_input_shape_; }
ShapeMapType max_input_shape() { return max_input_shape_; }
ShapeMapType optim_input_shape() { return optim_input_shape_; }
ShapeMapType min_shape_tensor() { return min_shape_tensor_; }
ShapeMapType max_shape_tensor() { return max_shape_tensor_; }
ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; }
bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape,
std::vector<std::string>* changed) {
......@@ -721,6 +730,9 @@ class TensorRTEngine {
ShapeMapType min_input_shape_;
ShapeMapType max_input_shape_;
ShapeMapType optim_input_shape_;
ShapeMapType min_shape_tensor_;
ShapeMapType max_shape_tensor_;
ShapeMapType optim_shape_tensor_;
bool disable_trt_plugin_fp16_{false};
phi::DataType model_precision_{phi::DataType::FLOAT32};
bool use_varseqlen_{false};
......@@ -812,6 +824,9 @@ class TRTEngineManager {
const std::map<std::string, std::vector<int>> min_input_shape = {},
const std::map<std::string, std::vector<int>> max_input_shape = {},
const std::map<std::string, std::vector<int>> optim_input_shape = {},
const std::map<std::string, std::vector<int>> min_shape_tensor = {},
const std::map<std::string, std::vector<int>> max_shape_tensor = {},
const std::map<std::string, std::vector<int>> optim_shape_tensor = {},
bool disable_trt_plugin_fp16 = false,
phi::DataType model_precision = phi::DataType::FLOAT32,
nvinfer1::ILogger& logger = NaiveLogger::Global()) {
......@@ -823,6 +838,9 @@ class TRTEngineManager {
min_input_shape,
max_input_shape,
optim_input_shape,
min_shape_tensor,
max_shape_tensor,
optim_shape_tensor,
disable_trt_plugin_fp16,
model_precision,
logger);
......
......@@ -31,6 +31,137 @@ namespace paddle {
namespace inference {
namespace tensorrt {
class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
protected:
void SetUp() override {
ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(platform::CUDAPlace(0), ctx_->stream())
.get());
ctx_->SetHostAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace())
.get());
ctx_->SetZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(platform::CUDAPlace(0))
.get());
ctx_->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
.get());
ctx_->PartialInitWithAllocator();
std::map<std::string, std::vector<int>> min_input_shape = {
{"input", {1, 32}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"input", {18, 32}}};
std::map<std::string, std::vector<int>> optim_input_shape = {
{"input", {18, 32}}};
std::map<std::string, std::vector<int>> min_input_value = {
{"shape", {1, 8, 4}}};
std::map<std::string, std::vector<int>> max_input_value = {
{"shape", {18, 8, 4}}};
std::map<std::string, std::vector<int>> optim_input_value = {
{"shape", {18, 8, 4}}};
engine_ = new TensorRTEngine(16,
1 << 10,
AnalysisConfig::Precision::kFloat32,
nullptr,
0,
min_input_shape,
max_input_shape,
optim_input_shape,
min_input_value,
max_input_value,
optim_input_value,
false,
phi::DataType::FLOAT32,
NaiveLogger::Global());
engine_->InitNetwork();
}
void TearDown() override {
if (engine_) {
delete engine_;
engine_ = nullptr;
}
}
void PrepareInputOutput(const std::vector<float> &input,
std::vector<int> output_shape) {
paddle::framework::TensorFromVector(input, *ctx_, &input_);
output_.Resize(phi::make_ddim(output_shape));
}
void PrepareShapeInput(const std::vector<int> &input) {
paddle::framework::TensorFromVector(input, *ctx_, &shape_);
}
void GetOutput(std::vector<float> *output) {
paddle::framework::TensorToVector(output_, *ctx_, output);
}
protected:
framework::LoDTensor input_;
framework::LoDTensor shape_;
framework::LoDTensor output_;
TensorRTEngine *engine_;
phi::GPUContext *ctx_;
};
TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
std::vector<void *> buffers(3);
std::cout << "with_dynamic_shape: " << engine_->with_dynamic_shape()
<< std::endl;
auto *x = engine_->DeclareInput(
"input", nvinfer1::DataType::kFLOAT, nvinfer1::Dims2{-1, 32});
nvinfer1::Dims shape_dim;
shape_dim.nbDims = 1;
shape_dim.d[0] = 3;
auto *shape =
engine_->DeclareInput("shape", nvinfer1::DataType::kINT32, shape_dim);
auto layer = engine_->network()->addShuffle(*x);
layer->setInput(1, *shape);
PADDLE_ENFORCE_NOT_NULL(
layer,
platform::errors::InvalidArgument("TRT shuffle layer building failed."));
engine_->DeclareOutput(layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 3);
std::vector<float> x_v(8 * 32);
for (int i = 0; i < 8 * 32; i++) {
x_v[i] = i % (8 * 32);
}
std::vector<int> shape_v = {8, 8, 4};
PrepareInputOutput(x_v, {8, 8, 4});
PrepareShapeInput(shape_v);
engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32});
engine_->context()->setBindingDimensions(1, shape_dim);
engine_->context()->setInputShapeBinding(1, shape_v.data());
auto *x_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *shape_gpu_data = shape_.mutable_data<int>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
buffers[0] = reinterpret_cast<void *>(x_gpu_data);
buffers[1] = reinterpret_cast<void *>(shape_gpu_data);
buffers[2] = reinterpret_cast<void *>(y_gpu_data);
engine_->Execute(-1, &buffers, ctx_->stream());
cudaStreamSynchronize(ctx_->stream());
std::vector<float> y_cpu;
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 0);
ASSERT_EQ(y_cpu[1], 1);
auto dims = engine_->context()->getBindingDimensions(2);
ASSERT_EQ(dims.nbDims, 3);
ASSERT_EQ(dims.d[0], 8);
ASSERT_EQ(dims.d[1], 8);
ASSERT_EQ(dims.d[2], 4);
return;
}
class TensorRTDynamicEngineTest : public ::testing::Test {
protected:
void SetUp() override {
......@@ -67,6 +198,9 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
min_input_shape,
max_input_shape,
optim_input_shape,
std::map<std::string, std::vector<int>>(),
std::map<std::string, std::vector<int>>(),
std::map<std::string, std::vector<int>>(),
false,
phi::DataType::FLOAT32,
NaiveLogger::Global());
......@@ -241,6 +375,9 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
min_input_shape,
max_input_shape,
optim_input_shape,
std::map<std::string, std::vector<int>>(),
std::map<std::string, std::vector<int>>(),
std::map<std::string, std::vector<int>>(),
false,
phi::DataType::FLOAT32,
NaiveLogger::Global());
......
......@@ -182,7 +182,10 @@ void SerializeShapeRangeInfo(
const std::string &path,
const std::map<std::string, std::vector<int32_t>> &min_shape,
const std::map<std::string, std::vector<int32_t>> &max_shape,
const std::map<std::string, std::vector<int32_t>> &opt_shape) {
const std::map<std::string, std::vector<int32_t>> &opt_shape,
const std::map<std::string, std::vector<int32_t>> &min_value,
const std::map<std::string, std::vector<int32_t>> &max_value,
const std::map<std::string, std::vector<int32_t>> &opt_value) {
paddle::inference::proto::ShapeRangeInfos shape_range_infos;
for (auto it : min_shape) {
auto *s = shape_range_infos.add_shape_range_info();
......@@ -192,10 +195,18 @@ void SerializeShapeRangeInfo(
s->add_max_shape(max_shape.at(it.first)[i]);
s->add_opt_shape(opt_shape.at(it.first)[i]);
}
// If it.first is a shape tensor, we should collect values from it.
if (min_value.count(it.first)) {
for (size_t i = 0; i < min_value.at(it.first).size(); ++i) {
s->add_min_value(min_value.at(it.first)[i]);
s->add_max_value(max_value.at(it.first)[i]);
s->add_opt_value(opt_value.at(it.first)[i]);
}
}
}
inference::SerializeShapeRangeInfo(path, shape_range_infos);
}
void DeserializeShapeRangeInfo(
const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) {
int fd = open(path.c_str(), O_RDONLY);
......@@ -213,7 +224,10 @@ void DeserializeShapeRangeInfo(
const std::string &path,
std::map<std::string, std::vector<int32_t>> *min_shape,
std::map<std::string, std::vector<int32_t>> *max_shape,
std::map<std::string, std::vector<int32_t>> *opt_shape) {
std::map<std::string, std::vector<int32_t>> *opt_shape,
std::map<std::string, std::vector<int32_t>> *min_value,
std::map<std::string, std::vector<int32_t>> *max_value,
std::map<std::string, std::vector<int32_t>> *opt_value) {
paddle::inference::proto::ShapeRangeInfos shape_range_infos;
DeserializeShapeRangeInfo(path, &shape_range_infos);
for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) {
......@@ -236,6 +250,26 @@ void DeserializeShapeRangeInfo(
opt_shape->insert(std::make_pair(name, tmp));
}
}
for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) {
auto info = shape_range_infos.shape_range_info(i);
auto name = info.name();
if (min_value->count(name) || max_value->count(name) ||
opt_value->count(name)) {
continue;
} else {
std::vector<int32_t> tmp(info.min_value_size());
for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.min_value(k);
min_value->insert(std::make_pair(name, tmp));
tmp.resize(info.max_value_size());
for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.max_value(k);
max_value->insert(std::make_pair(name, tmp));
tmp.resize(info.opt_value_size());
for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.opt_value(k);
opt_value->insert(std::make_pair(name, tmp));
}
}
}
void UpdateShapeRangeInfo(
......@@ -264,6 +298,7 @@ void UpdateShapeRangeInfo(
}
}
}
inference::SerializeShapeRangeInfo(path, shape_range_infos);
}
......
......@@ -42,23 +42,22 @@ void SerializePDTensorsToFile(const std::string& path,
const std::vector<PaddleTensor>& tensors);
void DeserializePDTensorsToFile(const std::string& path,
std::vector<PaddleTensor>* tensors);
void SerializeShapeRangeInfo(
const std::string& path,
const paddle::inference::proto::ShapeRangeInfos& info);
void SerializeShapeRangeInfo(
const std::string& path,
const std::map<std::string, std::vector<int32_t>>& min_shape,
const std::map<std::string, std::vector<int32_t>>& max_shape,
const std::map<std::string, std::vector<int32_t>>& opt_shape);
void DeserializeShapeRangeInfo(const std::string& path,
paddle::inference::proto::ShapeRangeInfos* info);
const std::map<std::string, std::vector<int32_t>>& opt_shape,
const std::map<std::string, std::vector<int32_t>>& min_value,
const std::map<std::string, std::vector<int32_t>>& max_value,
const std::map<std::string, std::vector<int32_t>>& opt_value);
void DeserializeShapeRangeInfo(
const std::string& path,
std::map<std::string, std::vector<int32_t>>* min_shape,
std::map<std::string, std::vector<int32_t>>* max_shape,
std::map<std::string, std::vector<int32_t>>* opt_shape);
std::map<std::string, std::vector<int32_t>>* opt_shape,
std::map<std::string, std::vector<int32_t>>* min_value,
std::map<std::string, std::vector<int32_t>>* max_value,
std::map<std::string, std::vector<int32_t>>* opt_value);
void UpdateShapeRangeInfo(
const std::string& path,
const std::map<std::string, std::vector<int32_t>>& min_shape,
......
......@@ -100,28 +100,48 @@ TEST(infer_io_utils, tensors) {
TEST(shape_info_io, read_and_write) {
const std::string path = "test_shape_info_io";
std::map<std::string, std::vector<int32_t>> min_shape, max_shape, opt_shape;
std::map<std::string, std::vector<int32_t>> min_value, max_value, opt_value;
min_shape.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 112, 112}));
max_shape.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
opt_shape.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
min_value.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 112, 112}));
max_value.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
opt_value.insert(
std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
paddle::inference::SerializeShapeRangeInfo(
path, min_shape, max_shape, opt_shape);
path, min_shape, max_shape, opt_shape, min_value, max_value, opt_value);
min_shape.clear();
max_shape.clear();
opt_shape.clear();
min_value.clear();
max_value.clear();
opt_value.clear();
opt_shape.insert(
std::make_pair("test2", std::vector<int32_t>{1, 3, 224, 224}));
paddle::inference::DeserializeShapeRangeInfo(
path, &min_shape, &max_shape, &opt_shape);
paddle::inference::DeserializeShapeRangeInfo(path,
&min_shape,
&max_shape,
&opt_shape,
&min_value,
&max_value,
&opt_value);
min_shape.insert(std::make_pair("test1", std::vector<int32_t>{1, 3, 56, 56}));
std::vector<std::string> names{"test1"};
paddle::inference::UpdateShapeRangeInfo(
path, min_shape, max_shape, opt_shape, names);
ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo(
"no_exists_file", &min_shape, &max_shape, &opt_shape);
ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo("no_exists_file",
&min_shape,
&max_shape,
&opt_shape,
&min_value,
&max_value,
&opt_value);
, paddle::platform::EnforceNotMet);
}
......@@ -23,6 +23,9 @@ message ShapeRangeInfos {
repeated int32 min_shape = 2;
repeated int32 max_shape = 3;
repeated int32 opt_shape = 4;
repeated int32 min_value = 5;
repeated int32 max_value = 6;
repeated int32 opt_value = 7;
}
repeated ShapeRangeInfo shape_range_info = 1;
......
......@@ -563,6 +563,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
#if IS_TRT_VERSION_GE(6000)
trt_context->setBindingDimensions(
bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
// If this x is a shape tensor, we need call setInputShapeBinding
if (engine->engine()->isShapeBinding(bind_index) &&
engine->engine()->bindingIsInput(bind_index)) {
std::vector<int> shape_v(t.numel());
paddle::memory::Copy(platform::CPUPlace(),
shape_v.data(),
platform::CUDAPlace(),
t.data<int32_t>(),
t.numel() * sizeof(int),
nullptr);
trt_context->setInputShapeBinding(bind_index, shape_v.data());
}
#endif
}
runtime_batch = t_shape[0];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册