diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index e1fe856e3c03e892f1c8d2c11063136da7b33ab6..97ca7c37c7f0e7525a258c39c8b1147e22f10250 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -77,6 +77,15 @@ void IRPassManager::CreatePasses(Argument *argument,
     pass->Set("optim_input_shape",
               new std::map<std::string, std::vector<int>>(
                   argument->optim_input_shape()));
+    // Now, shape tensor value is not explicit set by user,
+    // it is collected through API CollectShapeRangeInfo.
+    pass->Set("max_shape_tensor",
+              new std::map<std::string, std::vector<int>>());
+    pass->Set("min_shape_tensor",
+              new std::map<std::string, std::vector<int>>());
+    pass->Set("optim_shape_tensor",
+              new std::map<std::string, std::vector<int>>());
+
     // tuned trt dynamic_shape
     pass->Set("trt_tuned_dynamic_shape",
               new bool(argument->tensorrt_tuned_dynamic_shape()));
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index 05d66d7fe2793d35513333f67fdc25b56557d439..9c63c95fd91ba68be578d98bffc4cb60362fc40e 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -317,6 +317,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   auto opt_input_shape =
       Get<std::map<std::string, std::vector<int>>>("optim_input_shape");
 
+  auto min_shape_tensor =
+      Get<std::map<std::string, std::vector<int>>>("min_shape_tensor");
+  auto max_shape_tensor =
+      Get<std::map<std::string, std::vector<int>>>("max_shape_tensor");
+  auto opt_shape_tensor =
+      Get<std::map<std::string, std::vector<int>>>("optim_shape_tensor");
+
   auto allow_build_at_runtime = Get<bool>("trt_allow_build_at_runtime");
   auto shape_range_info_path = Get<std::string>("trt_shape_range_info_path");
   auto trt_tuned_dynamic_shape = Get<bool>("trt_tuned_dynamic_shape");
@@ -326,7 +333,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
     inference::DeserializeShapeRangeInfo(shape_range_info_path,
                                          &min_input_shape,
                                          &max_input_shape,
-                                         &opt_input_shape);
+                                         &opt_input_shape,
+                                         &min_shape_tensor,
+                                         &max_shape_tensor,
+                                         &opt_shape_tensor);
   }
 
   // The following procedure is used to rename all the intermediate
@@ -511,6 +521,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
                   min_input_shape,
                   max_input_shape,
                   opt_input_shape,
+                  min_shape_tensor,
+                  max_shape_tensor,
+                  opt_shape_tensor,
                   disable_trt_plugin_fp16,
                   static_cast<phi::DataType>(Get<int>("model_precision")));
   trt_engine->SetUseOSS(Get<bool>("use_varseqlen"));
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index a298cd1817e4a708a9ecd2413e83e695e116685b..42126b5048e685cac7ae1106a925faa0b55892c6 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1747,10 +1747,39 @@ void AnalysisPredictor::CollectShapeRangeInfo() {
     if (!var->IsType<phi::DenseTensor>()) {
       continue;
     }
-    framework::DDim dim = var->Get<phi::DenseTensor>().dims();
+    auto tensor = var->Get<phi::DenseTensor>();
+    framework::DDim dim = tensor.dims();
     std::vector<int32_t> shape(dim.size());
     for (size_t i = 0; i < shape.size(); ++i) shape[i] = dim[i];
     shape_info_[name].emplace_back(shape);
+
+    // We need collect value range for shape tensor for Paddle-TRT's use.
+    // To be noticed, this method to identify all shape tensors is based on
+    // assumption that all shape tensors in the model have numbers <= 7.
+    // This is a simple method to identify all shape tensors with some
+    // mistakes, but it doesn't matter.
+    auto is_shape_tensor = tensor.numel() <= 7 && tensor.numel() >= 1;
+    if (tensor.dtype() == paddle::experimental::DataType::INT32 &&
+        is_shape_tensor) {
+      std::vector<int> int32_host(tensor.numel());
+      if (tensor.place() == platform::CPUPlace()) {
+        paddle::memory::Copy(platform::CPUPlace(),
+                             int32_host.data(),
+                             platform::CPUPlace(),
+                             tensor.data<int>(),
+                             tensor.numel() * sizeof(int));
+      } else if (tensor.place() == platform::CUDAPlace()) {
+#if defined(PADDLE_WITH_CUDA)
+        paddle::memory::Copy(platform::CPUPlace(),
+                             int32_host.data(),
+                             platform::CUDAPlace(),
+                             tensor.data<int>(),
+                             tensor.numel() * sizeof(int),
+                             nullptr);
+#endif
+      }
+      shape_tensor_value_[name].emplace_back(int32_host);
+    }
   }
 }
 
@@ -1758,43 +1787,61 @@ void AnalysisPredictor::StatisticShapeRangeInfo() {
   std::map<std::string, std::vector<int32_t>> min_shapes;
   std::map<std::string, std::vector<int32_t>> max_shapes;
   std::map<std::string, std::vector<int32_t>> opt_shapes;
-  for (auto it : shape_info_) {
-    auto name = it.first;
-    auto shapes = it.second;
-
-    std::vector<int32_t> min_shape(shapes[0].begin(), shapes[0].end());
-    std::vector<int32_t> max_shape(shapes[0].begin(), shapes[0].end());
-    std::vector<int32_t> opt_shape(shapes[0].begin(), shapes[0].end());
-
-    auto ShapeMaxFreq = [](const std::map<int32_t, int32_t> &m) -> int32_t {
-      std::vector<std::pair<int32_t, int32_t>> counter;
-      for (auto &it : m) counter.push_back(it);
-      std::sort(
-          counter.begin(),
-          counter.end(),
-          [](std::pair<int32_t, int32_t> &a, std::pair<int32_t, int32_t> &b) {
-            return a.second > b.second;
-          });
-      return counter[0].first;
-    };
-
-    for (size_t d = 0; d < shapes[0].size(); ++d) {
-      std::map<int32_t, int32_t> counter;
-      for (size_t i = 0; i < shapes.size(); ++i) {
-        counter[shapes[i][d]] += 1;
-        if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d];
-        if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d];
-      }
-      opt_shape[d] = ShapeMaxFreq(counter);
-    }
-
-    min_shapes[name] = min_shape;
-    max_shapes[name] = max_shape;
-    opt_shapes[name] = opt_shape;
-  }
+  std::map<std::string, std::vector<int32_t>> min_values;
+  std::map<std::string, std::vector<int32_t>> max_values;
+  std::map<std::string, std::vector<int32_t>> opt_values;
+
+  auto extract_min_max_opt =
+      [](std::map<std::string, std::vector<int32_t>> &min_data,
+         decltype(min_data) max_data,
+         decltype(min_data) opt_data,
+         decltype(shape_info_) shape_data) {
+        for (auto it : shape_data) {
+          auto name = it.first;
+          auto shapes = it.second;
+
+          std::vector<int32_t> min_shape(shapes[0].begin(), shapes[0].end());
+          std::vector<int32_t> max_shape(shapes[0].begin(), shapes[0].end());
+          std::vector<int32_t> opt_shape(shapes[0].begin(), shapes[0].end());
+
+          auto ShapeMaxFreq =
+              [](const std::map<int32_t, int32_t> &m) -> int32_t {
+            std::vector<std::pair<int32_t, int32_t>> counter;
+            for (auto &it : m) counter.push_back(it);
+            std::sort(counter.begin(),
+                      counter.end(),
+                      [](std::pair<int32_t, int32_t> &a,
+                         std::pair<int32_t, int32_t> &b) {
+                        return a.second > b.second;
+                      });
+            return counter[0].first;
+          };
+
+          for (size_t d = 0; d < shapes[0].size(); ++d) {
+            std::map<int32_t, int32_t> counter;
+            for (size_t i = 0; i < shapes.size(); ++i) {
+              counter[shapes[i][d]] += 1;
+              if (shapes[i][d] < min_shape[d]) min_shape[d] = shapes[i][d];
+              if (shapes[i][d] > max_shape[d]) max_shape[d] = shapes[i][d];
+            }
+            opt_shape[d] = ShapeMaxFreq(counter);
+          }
 
-  inference::SerializeShapeRangeInfo(
-      config_.shape_range_info_path(), min_shapes, max_shapes, opt_shapes);
+          min_data[name] = min_shape;
+          max_data[name] = max_shape;
+          opt_data[name] = opt_shape;
+        }
+      };
+  extract_min_max_opt(min_shapes, max_shapes, opt_shapes, shape_info_);
+  extract_min_max_opt(min_values, max_values, opt_values, shape_tensor_value_);
+
+  inference::SerializeShapeRangeInfo(config_.shape_range_info_path(),
+                                     min_shapes,
+                                     max_shapes,
+                                     opt_shapes,
+                                     min_values,
+                                     max_values,
+                                     opt_values);
 }
 
 bool AnalysisPredictor::LoadProgramDesc() {
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 7b561275cbc52a0c424252c00099f217645c0188..ff34bac545d80ecb7f65f7b4b17a700113f8c2b5 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -514,6 +514,7 @@ class AnalysisPredictor : public PaddlePredictor {
   bool status_is_cloned_{false};
 
   std::map<std::string, std::vector<std::vector<int32_t>>> shape_info_;
+  std::map<std::string, std::vector<std::vector<int32_t>>> shape_tensor_value_;
   static int clone_num_;
 
   bool private_context_{false};
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 06886aa782fa7cf1087f982c73b4f8a2d58d1140..c6c0d0479c9e5fc2c9139457fee43d0206c2af0d 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -275,6 +275,35 @@ void TensorRTEngine::FreezeNetwork() {
             nvinfer1::OptProfileSelector::kOPT,
             Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true));
       }
+
+      for (int input_id = 0; input_id < network()->getNbInputs(); input_id++) {
+        auto input_name = network()->getInput(input_id)->getName();
+        if (!itensor_map_.count(input_name)) continue;
+        if (!GetITensor(input_name)->isShapeTensor()) continue;
+        PADDLE_ENFORCE_EQ(min_shape_tensor_.count(input_name) &&
+                              max_shape_tensor_.count(input_name) &&
+                              optim_shape_tensor_.count(input_name),
+                          true,
+                          platform::errors::InvalidArgument(
+                              "Fail to find min/max/optim shape value for TRT "
+                              "network's shape tensor input named %s.",
+                              input_name));
+        auto min_vec = min_shape_tensor_.at(input_name);
+        optim_profiles_[i]->setShapeValues(input_name,
+                                           nvinfer1::OptProfileSelector::kMIN,
+                                           min_vec.data(),
+                                           min_vec.size());
+        optim_profiles_[i]->setShapeValues(input_name,
+                                           nvinfer1::OptProfileSelector::kMAX,
+                                           max_shape_tensor_[input_name].data(),
+                                           min_vec.size());
+        optim_profiles_[i]->setShapeValues(
+            input_name,
+            nvinfer1::OptProfileSelector::kOPT,
+            optim_shape_tensor_[input_name].data(),
+            min_vec.size());
+      }
+
       infer_builder_config_->addOptimizationProfile(optim_profiles_[i]);
     }
     if (WithFp16() && disable_trt_plugin_fp16()) {
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 138f7faadf3adc77c4e527bc83cd10d8e048d421..f19b9fc505acf0aa444d3d38ef23cbb88065a715 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -217,6 +217,9 @@ class TensorRTEngine {
       const ShapeMapType min_input_shape = {},
       const ShapeMapType max_input_shape = {},
       const ShapeMapType optim_input_shape = {},
+      const ShapeMapType min_shape_tensor = {},
+      const ShapeMapType max_shape_tensor = {},
+      const ShapeMapType optim_shape_tensor = {},
       bool disable_trt_plugin_fp16 = false,
       phi::DataType model_precision = phi::DataType::FLOAT32,
       nvinfer1::ILogger& logger = NaiveLogger::Global())
@@ -228,6 +231,9 @@ class TensorRTEngine {
         min_input_shape_(min_input_shape),
         max_input_shape_(max_input_shape),
         optim_input_shape_(optim_input_shape),
+        min_shape_tensor_(min_shape_tensor),
+        max_shape_tensor_(max_shape_tensor),
+        optim_shape_tensor_(optim_shape_tensor),
         disable_trt_plugin_fp16_(disable_trt_plugin_fp16),
         model_precision_(model_precision),
         logger_(logger) {
@@ -443,6 +449,9 @@ class TensorRTEngine {
   ShapeMapType min_input_shape() { return min_input_shape_; }
   ShapeMapType max_input_shape() { return max_input_shape_; }
   ShapeMapType optim_input_shape() { return optim_input_shape_; }
+  ShapeMapType min_shape_tensor() { return min_shape_tensor_; }
+  ShapeMapType max_shape_tensor() { return max_shape_tensor_; }
+  ShapeMapType optim_shape_tensor() { return optim_shape_tensor_; }
 
   bool AdjustDynamicShapeRange(const ShapeMapType& runtime_input_shape,
                                std::vector<std::string>* changed) {
@@ -641,6 +650,9 @@ class TensorRTEngine {
   ShapeMapType min_input_shape_;
   ShapeMapType max_input_shape_;
   ShapeMapType optim_input_shape_;
+  ShapeMapType min_shape_tensor_;
+  ShapeMapType max_shape_tensor_;
+  ShapeMapType optim_shape_tensor_;
   bool disable_trt_plugin_fp16_{false};
   phi::DataType model_precision_{phi::DataType::FLOAT32};
   bool use_varseqlen_{false};
@@ -741,6 +753,9 @@ class TRTEngineManager {
       const std::map<std::string, std::vector<int>> min_input_shape = {},
       const std::map<std::string, std::vector<int>> max_input_shape = {},
       const std::map<std::string, std::vector<int>> optim_input_shape = {},
+      const std::map<std::string, std::vector<int>> min_shape_tensor = {},
+      const std::map<std::string, std::vector<int>> max_shape_tensor = {},
+      const std::map<std::string, std::vector<int>> optim_shape_tensor = {},
       bool disable_trt_plugin_fp16 = false,
       phi::DataType model_precision = phi::DataType::FLOAT32,
       nvinfer1::ILogger& logger = NaiveLogger::Global()) {
@@ -752,6 +767,9 @@ class TRTEngineManager {
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
+                                 min_shape_tensor,
+                                 max_shape_tensor,
+                                 optim_shape_tensor,
                                  disable_trt_plugin_fp16,
                                  model_precision,
                                  logger);
diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
index 6a253d2815941efb16e52676513be9a3ccfb32a4..ddf0ec5dd335e11ca12c5117750ae8cdf9cc6012 100644
--- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
@@ -31,6 +31,137 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {
 
+class TensorRTDynamicShapeValueEngineTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ctx_ = new phi::GPUContext(platform::CUDAPlace(0));
+    ctx_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                           .GetAllocator(platform::CUDAPlace(0), ctx_->stream())
+                           .get());
+    ctx_->SetHostAllocator(
+        paddle::memory::allocation::AllocatorFacade::Instance()
+            .GetAllocator(paddle::platform::CPUPlace())
+            .get());
+    ctx_->SetZeroAllocator(
+        paddle::memory::allocation::AllocatorFacade::Instance()
+            .GetZeroAllocator(platform::CUDAPlace(0))
+            .get());
+    ctx_->SetPinnedAllocator(
+        paddle::memory::allocation::AllocatorFacade::Instance()
+            .GetAllocator(paddle::platform::CUDAPinnedPlace())
+            .get());
+    ctx_->PartialInitWithAllocator();
+
+    std::map<std::string, std::vector<int>> min_input_shape = {
+        {"input", {1, 32}}};
+    std::map<std::string, std::vector<int>> max_input_shape = {
+        {"input", {18, 32}}};
+    std::map<std::string, std::vector<int>> optim_input_shape = {
+        {"input", {18, 32}}};
+    std::map<std::string, std::vector<int>> min_input_value = {
+        {"shape", {1, 8, 4}}};
+    std::map<std::string, std::vector<int>> max_input_value = {
+        {"shape", {18, 8, 4}}};
+    std::map<std::string, std::vector<int>> optim_input_value = {
+        {"shape", {18, 8, 4}}};
+    engine_ = new TensorRTEngine(16,
+                                 1 << 10,
+                                 AnalysisConfig::Precision::kFloat32,
+                                 nullptr,
+                                 0,
+                                 min_input_shape,
+                                 max_input_shape,
+                                 optim_input_shape,
+                                 min_input_value,
+                                 max_input_value,
+                                 optim_input_value,
+                                 false,
+                                 phi::DataType::FLOAT32,
+                                 NaiveLogger::Global());
+    engine_->InitNetwork();
+  }
+
+  void TearDown() override {
+    if (engine_) {
+      delete engine_;
+      engine_ = nullptr;
+    }
+  }
+
+  void PrepareInputOutput(const std::vector<float> &input,
+                          std::vector<int> output_shape) {
+    paddle::framework::TensorFromVector(input, *ctx_, &input_);
+    output_.Resize(phi::make_ddim(output_shape));
+  }
+  void PrepareShapeInput(const std::vector<int> &input) {
+    paddle::framework::TensorFromVector(input, *ctx_, &shape_);
+  }
+  void GetOutput(std::vector<float> *output) {
+    paddle::framework::TensorToVector(output_, *ctx_, output);
+  }
+
+ protected:
+  framework::LoDTensor input_;
+  framework::LoDTensor shape_;
+  framework::LoDTensor output_;
+  TensorRTEngine *engine_;
+  phi::GPUContext *ctx_;
+};
+
+TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
+  std::vector<void *> buffers(3);
+  std::cout << "with_dynamic_shape: " << engine_->with_dynamic_shape()
+            << std::endl;
+  auto *x = engine_->DeclareInput(
+      "input", nvinfer1::DataType::kFLOAT, nvinfer1::Dims2{-1, 32});
+  nvinfer1::Dims shape_dim;
+  shape_dim.nbDims = 1;
+  shape_dim.d[0] = 3;
+  auto *shape =
+      engine_->DeclareInput("shape", nvinfer1::DataType::kINT32, shape_dim);
+  auto layer = engine_->network()->addShuffle(*x);
+  layer->setInput(1, *shape);
+  PADDLE_ENFORCE_NOT_NULL(
+      layer,
+      platform::errors::InvalidArgument("TRT shuffle layer building failed."));
+  engine_->DeclareOutput(layer, 0, "y");
+  engine_->FreezeNetwork();
+  ASSERT_EQ(engine_->engine()->getNbBindings(), 3);
+
+  std::vector<float> x_v(8 * 32);
+  for (int i = 0; i < 8 * 32; i++) {
+    x_v[i] = i % (8 * 32);
+  }
+
+  std::vector<int> shape_v = {8, 8, 4};
+  PrepareInputOutput(x_v, {8, 8, 4});
+  PrepareShapeInput(shape_v);
+  engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32});
+  engine_->context()->setBindingDimensions(1, shape_dim);
+  engine_->context()->setInputShapeBinding(1, shape_v.data());
+
+  auto *x_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
+  auto *shape_gpu_data = shape_.mutable_data<int>(ctx_->GetPlace());
+  auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());
+
+  buffers[0] = reinterpret_cast<void *>(x_gpu_data);
+  buffers[1] = reinterpret_cast<void *>(shape_gpu_data);
+  buffers[2] = reinterpret_cast<void *>(y_gpu_data);
+
+  engine_->Execute(-1, &buffers, ctx_->stream());
+  cudaStreamSynchronize(ctx_->stream());
+  std::vector<float> y_cpu;
+  GetOutput(&y_cpu);
+  ASSERT_EQ(y_cpu[0], 0);
+  ASSERT_EQ(y_cpu[1], 1);
+  auto dims = engine_->context()->getBindingDimensions(2);
+  ASSERT_EQ(dims.nbDims, 3);
+  ASSERT_EQ(dims.d[0], 8);
+  ASSERT_EQ(dims.d[1], 8);
+  ASSERT_EQ(dims.d[2], 4);
+  return;
+}
+
 class TensorRTDynamicEngineTest : public ::testing::Test {
  protected:
   void SetUp() override {
@@ -67,6 +198,9 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
+                                 std::map<std::string, std::vector<int>>(),
+                                 std::map<std::string, std::vector<int>>(),
+                                 std::map<std::string, std::vector<int>>(),
                                  false,
                                  phi::DataType::FLOAT32,
                                  NaiveLogger::Global());
@@ -241,6 +375,9 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
                                  min_input_shape,
                                  max_input_shape,
                                  optim_input_shape,
+                                 std::map<std::string, std::vector<int>>(),
+                                 std::map<std::string, std::vector<int>>(),
+                                 std::map<std::string, std::vector<int>>(),
                                  false,
                                  phi::DataType::FLOAT32,
                                  NaiveLogger::Global());
diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc
index e45e3a1035fe5d6d9b0701808644f09b2ababa70..ef342a4eefce3dbb7846c74ace894cabb508694b 100644
--- a/paddle/fluid/inference/utils/io_utils.cc
+++ b/paddle/fluid/inference/utils/io_utils.cc
@@ -182,7 +182,10 @@ void SerializeShapeRangeInfo(
     const std::string &path,
     const std::map<std::string, std::vector<int32_t>> &min_shape,
     const std::map<std::string, std::vector<int32_t>> &max_shape,
-    const std::map<std::string, std::vector<int32_t>> &opt_shape) {
+    const std::map<std::string, std::vector<int32_t>> &opt_shape,
+    const std::map<std::string, std::vector<int32_t>> &min_value,
+    const std::map<std::string, std::vector<int32_t>> &max_value,
+    const std::map<std::string, std::vector<int32_t>> &opt_value) {
   paddle::inference::proto::ShapeRangeInfos shape_range_infos;
   for (auto it : min_shape) {
     auto *s = shape_range_infos.add_shape_range_info();
@@ -192,10 +195,18 @@ void SerializeShapeRangeInfo(
       s->add_max_shape(max_shape.at(it.first)[i]);
       s->add_opt_shape(opt_shape.at(it.first)[i]);
     }
+    // If it.first is a shape tensor, we should collect values from it.
+    if (min_value.count(it.first)) {
+      for (size_t i = 0; i < min_value.at(it.first).size(); ++i) {
+        s->add_min_value(min_value.at(it.first)[i]);
+        s->add_max_value(max_value.at(it.first)[i]);
+        s->add_opt_value(opt_value.at(it.first)[i]);
+      }
+    }
   }
-
   inference::SerializeShapeRangeInfo(path, shape_range_infos);
 }
+
 void DeserializeShapeRangeInfo(
     const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) {
   int fd = open(path.c_str(), O_RDONLY);
@@ -213,7 +224,10 @@ void DeserializeShapeRangeInfo(
     const std::string &path,
     std::map<std::string, std::vector<int32_t>> *min_shape,
     std::map<std::string, std::vector<int32_t>> *max_shape,
-    std::map<std::string, std::vector<int32_t>> *opt_shape) {
+    std::map<std::string, std::vector<int32_t>> *opt_shape,
+    std::map<std::string, std::vector<int32_t>> *min_value,
+    std::map<std::string, std::vector<int32_t>> *max_value,
+    std::map<std::string, std::vector<int32_t>> *opt_value) {
   paddle::inference::proto::ShapeRangeInfos shape_range_infos;
   DeserializeShapeRangeInfo(path, &shape_range_infos);
   for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) {
@@ -236,6 +250,26 @@ void DeserializeShapeRangeInfo(
       opt_shape->insert(std::make_pair(name, tmp));
     }
   }
+  for (int i = 0; i < shape_range_infos.shape_range_info_size(); ++i) {
+    auto info = shape_range_infos.shape_range_info(i);
+    auto name = info.name();
+    if (min_value->count(name) || max_value->count(name) ||
+        opt_value->count(name)) {
+      continue;
+    } else {
+      std::vector<int32_t> tmp(info.min_value_size());
+      for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.min_value(k);
+      min_value->insert(std::make_pair(name, tmp));
+
+      tmp.resize(info.max_value_size());
+      for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.max_value(k);
+      max_value->insert(std::make_pair(name, tmp));
+
+      tmp.resize(info.opt_value_size());
+      for (size_t k = 0; k < tmp.size(); ++k) tmp[k] = info.opt_value(k);
+      opt_value->insert(std::make_pair(name, tmp));
+    }
+  }
 }
 
 void UpdateShapeRangeInfo(
@@ -264,6 +298,7 @@ void UpdateShapeRangeInfo(
       }
     }
   }
+
   inference::SerializeShapeRangeInfo(path, shape_range_infos);
 }
 
diff --git a/paddle/fluid/inference/utils/io_utils.h b/paddle/fluid/inference/utils/io_utils.h
index 682bbdef05edcca79456b3e134d23caab73697a8..64d6b3be4d94cb36593389c2b5df5650ff298083 100644
--- a/paddle/fluid/inference/utils/io_utils.h
+++ b/paddle/fluid/inference/utils/io_utils.h
@@ -42,23 +42,22 @@ void SerializePDTensorsToFile(const std::string& path,
                               const std::vector<PaddleTensor>& tensors);
 void DeserializePDTensorsToFile(const std::string& path,
                                 std::vector<PaddleTensor>* tensors);
-
-void SerializeShapeRangeInfo(
-    const std::string& path,
-    const paddle::inference::proto::ShapeRangeInfos& info);
 void SerializeShapeRangeInfo(
     const std::string& path,
     const std::map<std::string, std::vector<int32_t>>& min_shape,
     const std::map<std::string, std::vector<int32_t>>& max_shape,
-    const std::map<std::string, std::vector<int32_t>>& opt_shape);
-void DeserializeShapeRangeInfo(const std::string& path,
-                               paddle::inference::proto::ShapeRangeInfos* info);
+    const std::map<std::string, std::vector<int32_t>>& opt_shape,
+    const std::map<std::string, std::vector<int32_t>>& min_value,
+    const std::map<std::string, std::vector<int32_t>>& max_value,
+    const std::map<std::string, std::vector<int32_t>>& opt_value);
 void DeserializeShapeRangeInfo(
     const std::string& path,
     std::map<std::string, std::vector<int32_t>>* min_shape,
     std::map<std::string, std::vector<int32_t>>* max_shape,
-    std::map<std::string, std::vector<int32_t>>* opt_shape);
-
+    std::map<std::string, std::vector<int32_t>>* opt_shape,
+    std::map<std::string, std::vector<int32_t>>* min_value,
+    std::map<std::string, std::vector<int32_t>>* max_value,
+    std::map<std::string, std::vector<int32_t>>* opt_value);
 void UpdateShapeRangeInfo(
     const std::string& path,
     const std::map<std::string, std::vector<int32_t>>& min_shape,
diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc
index 7707140fb9762efaab1308fc0ba64ee04f67c0b9..812c22aa67d2eef8b3e7989461212a167ac52ab7 100644
--- a/paddle/fluid/inference/utils/io_utils_tester.cc
+++ b/paddle/fluid/inference/utils/io_utils_tester.cc
@@ -100,28 +100,48 @@ TEST(infer_io_utils, tensors) {
 TEST(shape_info_io, read_and_write) {
   const std::string path = "test_shape_info_io";
   std::map<std::string, std::vector<int32_t>> min_shape, max_shape, opt_shape;
+  std::map<std::string, std::vector<int32_t>> min_value, max_value, opt_value;
   min_shape.insert(
       std::make_pair("test1", std::vector<int32_t>{1, 3, 112, 112}));
   max_shape.insert(
       std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
   opt_shape.insert(
       std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
+  min_value.insert(
+      std::make_pair("test1", std::vector<int32_t>{1, 3, 112, 112}));
+  max_value.insert(
+      std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
+  opt_value.insert(
+      std::make_pair("test1", std::vector<int32_t>{1, 3, 224, 224}));
   paddle::inference::SerializeShapeRangeInfo(
-      path, min_shape, max_shape, opt_shape);
+      path, min_shape, max_shape, opt_shape, min_value, max_value, opt_value);
   min_shape.clear();
   max_shape.clear();
   opt_shape.clear();
+  min_value.clear();
+  max_value.clear();
+  opt_value.clear();
   opt_shape.insert(
       std::make_pair("test2", std::vector<int32_t>{1, 3, 224, 224}));
-  paddle::inference::DeserializeShapeRangeInfo(
-      path, &min_shape, &max_shape, &opt_shape);
+  paddle::inference::DeserializeShapeRangeInfo(path,
+                                               &min_shape,
+                                               &max_shape,
+                                               &opt_shape,
+                                               &min_value,
+                                               &max_value,
+                                               &opt_value);
 
   min_shape.insert(std::make_pair("test1", std::vector<int32_t>{1, 3, 56, 56}));
   std::vector<std::string> names{"test1"};
   paddle::inference::UpdateShapeRangeInfo(
       path, min_shape, max_shape, opt_shape, names);
 
-  ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo(
-                   "no_exists_file", &min_shape, &max_shape, &opt_shape);
+  ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo("no_exists_file",
+                                                            &min_shape,
+                                                            &max_shape,
+                                                            &opt_shape,
+                                                            &min_value,
+                                                            &max_value,
+                                                            &opt_value);
                , paddle::platform::EnforceNotMet);
 }
diff --git a/paddle/fluid/inference/utils/shape_range_info.proto b/paddle/fluid/inference/utils/shape_range_info.proto
index fcb2d635b52261ed810c18536ec8ad22dd88d575..53f018cb5934899f537a54e8e285b0079297b4b0 100644
--- a/paddle/fluid/inference/utils/shape_range_info.proto
+++ b/paddle/fluid/inference/utils/shape_range_info.proto
@@ -23,6 +23,9 @@ message ShapeRangeInfos {
     repeated int32 min_shape = 2;
     repeated int32 max_shape = 3;
     repeated int32 opt_shape = 4;
+    repeated int32 min_value = 5;
+    repeated int32 max_value = 6;
+    repeated int32 opt_value = 7;
   }
 
   repeated ShapeRangeInfo shape_range_info = 1;
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
index 2acb33def7d9ed4071ae6452fc9b6a4dccfc3270..178c0fc22a57a91cd163f6005f5ac7498b69cc8a 100644
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -554,6 +554,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
 #if IS_TRT_VERSION_GE(6000)
         trt_context->setBindingDimensions(
             bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
+        // If this x is a shape tensor, we need call setInputShapeBinding
+        if (engine->engine()->isShapeBinding(bind_index) &&
+            engine->engine()->bindingIsInput(bind_index)) {
+          std::vector<int> shape_v(t.numel());
+          paddle::memory::Copy(platform::CPUPlace(),
+                               shape_v.data(),
+                               platform::CUDAPlace(),
+                               t.data<int32_t>(),
+                               t.numel() * sizeof(int),
+                               nullptr);
+          trt_context->setInputShapeBinding(bind_index, shape_v.data());
+        }
 #endif
       }
       runtime_batch = t_shape[0];