提交 4f5f0be7 编写于 作者: L Luo Tao

use the latest buffer to update the convert

上级 a3ba264c
...@@ -59,8 +59,8 @@ class EngineBase { ...@@ -59,8 +59,8 @@ class EngineBase {
struct Buffer { struct Buffer {
void* buffer{nullptr}; // buffer should be allocated only once. void* buffer{nullptr}; // buffer should be allocated only once.
int max_size; // buffer allocated space. size_t max_size; // buffer allocated space.
int size; // data size. size_t size; // data size.
DeviceType device{DeviceType::UNK}; // tells which device this buffer is on. DeviceType device{DeviceType::UNK}; // tells which device this buffer is on.
}; };
......
nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto) nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine) nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
add_subdirectory(convert) add_subdirectory(convert)
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
...@@ -58,7 +58,7 @@ class DefaultIOConverter : public EngineIOConverter { ...@@ -58,7 +58,7 @@ class DefaultIOConverter : public EngineIOConverter {
cudaMemcpyDeviceToHost, *stream_)); cudaMemcpyDeviceToHost, *stream_));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyHostToHost, *stream_)); cudaMemcpyDeviceToDevice, *stream_));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW("Unknown device for converter");
} }
...@@ -66,6 +66,7 @@ class DefaultIOConverter : public EngineIOConverter { ...@@ -66,6 +66,7 @@ class DefaultIOConverter : public EngineIOConverter {
} }
}; };
// fluid LodTensor <-> tensorrt ITensor
REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter); REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter);
} // namespace tensorrt } // namespace tensorrt
......
...@@ -74,13 +74,13 @@ void Compare(const std::string op_type, float input, float expect) { ...@@ -74,13 +74,13 @@ void Compare(const std::string op_type, float input, float expect) {
// convert LoDTensor to ITensor // convert LoDTensor to ITensor
size_t size = x_tensor->memory_size(); size_t size = x_tensor->memory_size();
EngineIOConverter::ConvertInput(op_type, *x_tensor, engine->buffer("X"), size, EngineIOConverter::ConvertInput(op_type, *x_tensor,
&stream); engine->buffer("X").buffer, size, &stream);
// run tensorrt Outp // run tensorrt Outp
engine->Execute(1); engine->Execute(1);
// convert ITensor to LoDTensor // convert ITensor to LoDTensor
EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out"), out_tensor, EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
size, &stream); out_tensor, size, &stream);
// get tensorrt output // get tensorrt output
std::vector<float> out2; std::vector<float> out2;
framework::TensorToVector(*out_tensor, ctx, &out2); framework::TensorToVector(*out_tensor, ctx, &out2);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册