From 4f5f0be769228a61bb91baf63faa6ca3a71382d0 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 14 May 2018 21:19:34 +0800 Subject: [PATCH] use the latest buffer to update the convert --- paddle/fluid/inference/engine.h | 4 ++-- paddle/fluid/inference/tensorrt/CMakeLists.txt | 1 - paddle/fluid/inference/tensorrt/convert/CMakeLists.txt | 2 +- paddle/fluid/inference/tensorrt/convert/io_converter.cc | 3 ++- .../inference/tensorrt/convert/test_activation_op.cc | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h index c0e5f3b201b..ce2b8161715 100644 --- a/paddle/fluid/inference/engine.h +++ b/paddle/fluid/inference/engine.h @@ -59,8 +59,8 @@ class EngineBase { struct Buffer { void* buffer{nullptr}; // buffer should be allocated only once. - int max_size; // buffer allocated space. - int size; // data size. + size_t max_size; // buffer allocated space. + size_t size; // data size. DeviceType device{DeviceType::UNK}; // tells which device this buffer is on. }; diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index 677b3e04af8..b52d083f280 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -1,5 +1,4 @@ nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine) - add_subdirectory(convert) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 5178c54c084..4fb4511d991 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,4 +1,4 @@ nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) -nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc +nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc index 13bc2b37595..2d583c00123 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc @@ -58,7 +58,7 @@ class DefaultIOConverter : public EngineIOConverter { cudaMemcpyDeviceToHost, *stream_)); } else if (is_gpu_place(place)) { PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyHostToHost, *stream_)); + cudaMemcpyDeviceToDevice, *stream_)); } else { PADDLE_THROW("Unknown device for converter"); } @@ -66,6 +66,7 @@ class DefaultIOConverter : public EngineIOConverter { } }; +// fluid LodTensor <-> tensorrt ITensor REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter); } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index c43f7202127..3275b65bc6d 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -74,13 +74,13 @@ void Compare(const std::string op_type, float input, float expect) { // convert LoDTensor to ITensor size_t size = x_tensor->memory_size(); - EngineIOConverter::ConvertInput(op_type, *x_tensor, engine->buffer("X"), size, - &stream); + EngineIOConverter::ConvertInput(op_type, *x_tensor, + engine->buffer("X").buffer, size, &stream); // run tensorrt Outp engine->Execute(1); // convert ITensor to LoDTensor - EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out"), out_tensor, - size, &stream); + EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer, + out_tensor, size, &stream); // get tensorrt output std::vector out2; framework::TensorToVector(*out_tensor, ctx, &out2); -- GitLab