fix:transform the data from cpu to gpu when trt is used (#37427) (#38745)

Co-authored-by: N feng_shuai <fengshuai03@baidu.com>

fix:transform the data from cpu to gpu when trt is used (#37427) (#38745)
Co-authored-by: N feng_shuai <fengshuai03@baidu.com>
a6b1c4c1 · Wilber · GitHub · 1e8432f2 · a6b1c4c1
隐藏空白更改
内联并排

Showing with 8 addition and 0 deletion

paddle/fluid/operators/tensorrt/tensorrt_engine_op.h paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +8 -0

未找到文件。
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>

+#include "paddle/fluid/framework/data_device_transform.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
@@ -421,6 +422,13 @@ class TensorRTEngineOp : public framework::OperatorBase {
      // convert input and copy to TRT engine's buffer
      auto &t =
          inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
+      // check the input_tensor
+      if (!platform::is_gpu_place(t.place())) {
+        framework::Tensor out;
+        platform::CUDAPlace dst_place;
+        framework::TransDataDevice(t, dst_place, &out);
+        t.ShareDataWith(out);
+      }
      auto t_shape = framework::vectorize<int64_t>(t.dims());
      const int bind_index = engine->engine()->getBindingIndex(x.c_str());
      PADDLE_ENFORCE_LT(