diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 682614742cf1bd3130c638020a2545e16226d4d6..4158d0528a1aea52c2a3f0880fe1000183a9df53 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -92,6 +92,9 @@ if(WITH_GPU) if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile") endif() + if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) + message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") + endif() include_directories(${TENSORRT_INCLUDE_DIR}) endif() elseif(WITH_AMD_GPU) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 0dd0e5c9a2b08e406bf500f40e2fc8926012ac0e..748f5a084e8c880df215a60fe51c835ba5cd3110 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,9 +1,4 @@ # Add TRT tests -# This test is not stable -# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828 -#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc -# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine -# SERIAL) nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc DEPS tensorrt_engine mul_op) @@ -16,3 +11,5 @@ nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL) nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL) +nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index 7facf30d781a26c2c6eb0a8966ef1b87e5dfdf0b..e1cace9cc1b06f036f52e82b7b86c99a02d50f50 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -22,7 +22,8 @@ namespace tensorrt { class ReluOpConverter : public OpConverter { public: ReluOpConverter() {} - void operator()(const framework::proto::OpDesc& op) override { + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { // Here the two nullptr looks strange, that's because the // framework::OpDesc's constructor is strange. framework::OpDesc op_desc(op, nullptr); @@ -33,7 +34,12 @@ class ReluOpConverter : public OpConverter { nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER( engine_, Activation, *const_cast(input_tensor), nvinfer1::ActivationType::kRELU); - engine_->SetITensor(op_desc.Output("Out")[0], layer->getOutput(0)); + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } } }; diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index 86ca2ca08eb14265e1bfe7abd5eb6af5c83b8a5c..0a02a7bebf9efbd0555707e6cfa701ef1e7d9659 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -1,106 +1,47 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #include -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/tensorrt/convert/io_converter.h" -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/place.h" - -USE_OP(relu); +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" namespace paddle { namespace inference { namespace tensorrt { -void Compare(const std::string op_type, float input, float expect) { +TEST(ReluOpConverter, main) { framework::Scope scope; - platform::CUDAPlace place; - platform::CUDADeviceContext ctx(place); - - // init fluid op and variable - auto x_var = scope.Var("X"); - auto x_tensor = x_var->GetMutable(); - x_tensor->Resize({1, 1}); - x_tensor->mutable_data(place); - std::vector init; - init.push_back(input); - framework::TensorFromVector(init, ctx, x_tensor); - - auto out_var = scope.Var("Out"); - auto out_tensor = out_var->GetMutable(); - out_tensor->Resize({1, 1}); - out_tensor->mutable_data(place); - - framework::OpDesc op_desc; - op_desc.SetType(op_type); - op_desc.SetInput("X", {"X"}); - op_desc.SetOutput("Out", {"Out"}); - - auto op = framework::OpRegistry::CreateOp(*op_desc.Proto()); - - // run fluid op - op->Run(scope, place); - // get fluid output - std::vector out1; - framework::TensorToVector(*out_tensor, ctx, &out1); - - // init tensorrt op - cudaStream_t stream; - ASSERT_EQ(0, cudaStreamCreate(&stream)); - TensorRTEngine* engine = new TensorRTEngine(1, 1 << 10, &stream); - engine->InitNetwork(); - engine->DeclareInput("X", nvinfer1::DataType::kFLOAT, - nvinfer1::DimsCHW{1, 1, 1}); - // convert op - OpConverter op_converter; - op_converter.ConvertOp(*op_desc.Proto(), engine); - - engine->DeclareOutput("Out"); - engine->FreezeNetwork(); - - // convert LoDTensor to ITensor - size_t size = x_tensor->memory_size(); - EngineIOConverter::ConvertInput(op_type, *x_tensor, - engine->buffer("X").buffer, size, &stream); - // run tensorrt Outp - engine->Execute(1); - // convert ITensor to LoDTensor - EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer, - out_tensor, size, &stream); - // get tensorrt output - std::vector out2; - framework::TensorToVector(*out_tensor, ctx, &out2); - - // compare - ASSERT_EQ(out1[0], out2[0]); - ASSERT_EQ(out1[0], expect); - - delete engine; - cudaStreamDestroy(stream); -} - -TEST(OpConverter, ConvertRelu) { - Compare("relu", 1, 1); // relu(1) = 1 - Compare("relu", -5, 0); // relu(-5) = 0 + std::unordered_set parameters; + TRTConvertValidation validator(10, parameters, scope, 1000); + validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6)); + validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("relu"); + desc.SetInput("X", {"relu-X"}); + desc.SetOutput("Out", {"relu-Out"}); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(10); } } // namespace tensorrt } // namespace inference } // namespace paddle -USE_OP(activation); +USE_OP(relu);