diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index c38088a2b800218b4afe03d341499edc7913ea39..e6cea1b46f9e299f731a3c633c81146158cb72c1 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -674,8 +674,39 @@ void Tensor::ORTCopyFromCpu(const T *data) {
                               OrtMemTypeDefault);
   size_t size = std::accumulate(begin(shape_), end(shape_), 1UL,
                                 std::multiplies<size_t>());
-  auto ort_value = GetOrtVaule(memory_info, const_cast<T *>(data), size,
-                               shape_.data(), shape_.size());
+  size_t buffer_size = size * sizeof(T);
+  if (buffer_size > buffer_.size()) {
+    buffer_.resize(buffer_size);
+  }
+  std::memcpy(static_cast<void *>(buffer_.data()), data, buffer_size);
+
+  auto onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+  if (std::is_same<T, float>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  } else if (std::is_same<T, double>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
+  } else if (std::is_same<T, int64_t>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  } else if (std::is_same<T, int32_t>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+  } else if (std::is_same<T, uint8_t>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+  } else if (std::is_same<T, int8_t>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
+  } else if (std::is_same<T, float16>::value) {
+    onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+  }
+
+  if (onnx_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED) {
+    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+        "Found undefined data type for onnxruntime, only supports "
+        "float16/float32/float64/int8/uint8/int32/int64."));
+  }
+
+  auto ort_value =
+      Ort::Value::CreateTensor(memory_info, buffer_.data(), buffer_size,
+                               shape_.data(), shape_.size(), onnx_dtype);
+
   binding->BindInput(name_.c_str(), ort_value);
 }
 
diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h
index 2afe2d32e2f60e47136b1e2f002b0e98c9b17cd2..2ae5ac5e6d336a5fcd884d7cf0f8cb755fd7ae96 100644
--- a/paddle/fluid/inference/api/paddle_tensor.h
+++ b/paddle/fluid/inference/api/paddle_tensor.h
@@ -183,6 +183,7 @@ class PD_INFER_DECL Tensor {
 #ifdef PADDLE_WITH_ONNXRUNTIME
   bool is_ort_tensor_{false};
   std::vector<int64_t> shape_;
+  std::vector<int8_t> buffer_;
   std::weak_ptr<Ort::IoBinding> binding_;
   int idx_{-1};