未验证 提交 a6b1c4c1 编写于 作者: W Wilber 提交者: GitHub

fix:transform the data from cpu to gpu when trt is used (#37427) (#38745)

Co-authored-by: Nfeng_shuai <fengshuai03@baidu.com>
上级 1e8432f2
......@@ -24,6 +24,7 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_device_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
......@@ -421,6 +422,13 @@ class TensorRTEngineOp : public framework::OperatorBase {
// convert input and copy to TRT engine's buffer
auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
// check the input_tensor
if (!platform::is_gpu_place(t.place())) {
framework::Tensor out;
platform::CUDAPlace dst_place;
framework::TransDataDevice(t, dst_place, &out);
t.ShareDataWith(out);
}
auto t_shape = framework::vectorize<int64_t>(t.dims());
const int bind_index = engine->engine()->getBindingIndex(x.c_str());
PADDLE_ENFORCE_LT(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册