未验证 提交 c333af2f 编写于 作者: Z Zhang Jun 提交者: GitHub

[inference] CPU-> GPU async io copy for TensorRT using ShareExternalData API (#46636)

上级 0c789ae5
...@@ -501,8 +501,7 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -501,8 +501,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
// check the input_tensor // check the input_tensor
if (!platform::is_gpu_place(t.place())) { if (!platform::is_gpu_place(t.place())) {
phi::DenseTensor out; phi::DenseTensor out;
platform::CUDAPlace dst_place; framework::TensorCopy(t, dev_place, dev_ctx, &out);
framework::TransDataDevice(t, dst_place, &out);
t.ShareDataWith(out); t.ShareDataWith(out);
} }
auto t_shape = phi::vectorize<int64_t>(t.dims()); auto t_shape = phi::vectorize<int64_t>(t.dims());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册