提交 4f5f0be7 编写于 作者: L Luo Tao

use the latest buffer to update the convert

上级 a3ba264c
......@@ -59,8 +59,8 @@ class EngineBase {
struct Buffer {
void* buffer{nullptr}; // buffer should be allocated only once.
int max_size; // buffer allocated space.
int size; // data size.
size_t max_size; // buffer allocated space.
size_t size; // data size.
DeviceType device{DeviceType::UNK}; // tells which device this buffer is on.
};
......
nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
add_subdirectory(convert)
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
......@@ -58,7 +58,7 @@ class DefaultIOConverter : public EngineIOConverter {
cudaMemcpyDeviceToHost, *stream_));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyHostToHost, *stream_));
cudaMemcpyDeviceToDevice, *stream_));
} else {
PADDLE_THROW("Unknown device for converter");
}
......@@ -66,6 +66,7 @@ class DefaultIOConverter : public EngineIOConverter {
}
};
// fluid LodTensor <-> tensorrt ITensor
REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter);
} // namespace tensorrt
......
......@@ -74,13 +74,13 @@ void Compare(const std::string op_type, float input, float expect) {
// convert LoDTensor to ITensor
size_t size = x_tensor->memory_size();
EngineIOConverter::ConvertInput(op_type, *x_tensor, engine->buffer("X"), size,
&stream);
EngineIOConverter::ConvertInput(op_type, *x_tensor,
engine->buffer("X").buffer, size, &stream);
// run tensorrt Outp
engine->Execute(1);
// convert ITensor to LoDTensor
EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out"), out_tensor,
size, &stream);
EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
out_tensor, size, &stream);
// get tensorrt output
std::vector<float> out2;
framework::TensorToVector(*out_tensor, ctx, &out2);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册