diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 748f5a084e8c880df215a60fe51c835ba5cd3110..c841510ae6e54164a913592bbd0e5720380fdb7b 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,6 +1,6 @@ # Add TRT tests nv_library(tensorrt_converter - SRCS mul_op.cc conv2d_op.cc fc_op.cc + SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc DEPS tensorrt_engine mul_op) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -13,3 +13,6 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL) nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL) + +nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..11cad95361867476c6f775af778015da37f1cfb1 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * Pool2dOp, IPoolingLayer in TRT. This Layer doesn't has weights. + */ +class Pool2dOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(4) + << "convert a fluid pool2d op to tensorrt pool2d layer without bias"; + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); + + std::string pool_type = + boost::get(op_desc.GetAttr("pooling_type")); + std::vector ksize = + boost::get>(op_desc.GetAttr("ksize")); + std::vector strides = + boost::get>(op_desc.GetAttr("strides")); + std::vector paddings = + boost::get>(op_desc.GetAttr("paddings")); + + const nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]); + const nvinfer1::DimsHW nv_strides(strides[0], strides[1]); + const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]); + + PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL); + + nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX; + if (pool_type == "max") { + nv_pool_type = nvinfer1::PoolingType::kMAX; + } else if (pool_type == "avg") { + nv_pool_type = nvinfer1::PoolingType::kAVERAGE; + } else { + PADDLE_THROW("TensorRT unsupported pooling type!"); + } + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, + *const_cast(input1), + nv_pool_type, nv_ksize); + PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created."); + layer->setStride(nv_strides); + layer->setPadding(nv_paddings); + + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { + engine_->DeclareOutput(output_name); + } + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +USE_OP(pool2d); +REGISTER_TRT_OP_CONVERTER(pool2d, Pool2dOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index 7dabfd9f6a9a8cfbdd1d9a66541180d3499b7bdc..e82762ea03ecd00bce7cfb83b130a3436ccbfed3 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) { validator.SetOp(*desc.Proto()); LOG(INFO) << "execute"; - validator.Execute(1); + validator.Execute(5); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc index 081f4d605975f1408d4d8a8ed3108c04d837a4de..1ae2668e733aad23241c63b9985e708396d0b1bc 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc @@ -24,9 +24,8 @@ TEST(fc_op, test) { std::unordered_set parameters({"mul-Y"}); framework::Scope scope; TRTConvertValidation validator(10, parameters, scope, 1000); - validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1)); + validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1)); validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2)); - // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2)); // Prepare Op description @@ -38,7 +37,7 @@ TEST(fc_op, test) { validator.SetOp(*desc.Proto()); - validator.Execute(1); + validator.Execute(10); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc index 674f37f2fdddf013a8f6f4671debbc19c3322423..3d34cd7d5d0deca4d83a3f5b5ed0fb396c6acd56 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc @@ -23,7 +23,7 @@ namespace tensorrt { TEST(MulOpConverter, main) { framework::Scope scope; std::unordered_set parameters; - TRTConvertValidation validator(10, parameters, scope, 1000); + TRTConvertValidation validator(10, parameters, scope, 1000, false); validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10)); @@ -39,7 +39,7 @@ TEST(MulOpConverter, main) { validator.SetOp(*desc.Proto()); LOG(INFO) << "execute"; - validator.Execute(1); + validator.Execute(2); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c5dddbc8cd37b9fb1ba39382af2da5ad045f3af2 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(Pool2dOpConverter, main) { + framework::Scope scope; + std::unordered_set parameters; + TRTConvertValidation validator(5, parameters, scope, 1 << 15); + + // The ITensor's Dims should not contain the batch size. + // So, the ITensor's Dims of input and output should be C * H * W. + validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4)); + validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("pool2d"); + desc.SetInput("X", {"pool2d-X"}); + desc.SetOutput("Out", {"pool2d-Out"}); + + std::vector ksize({2, 2}); + std::vector strides({2, 2}); + std::vector paddings({0, 0}); + std::string pooling_t = "max"; + + desc.SetAttr("pooling_type", pooling_t); + desc.SetAttr("ksize", ksize); + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(3); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +USE_OP(pool2d); diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index f14885b238134cdf38a278cd8a0734947bcacfe0..39529cc2c799212f91107b1b86dd2c8c3642b6da 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -63,13 +63,16 @@ class TRTConvertValidation { public: TRTConvertValidation() = delete; - TRTConvertValidation(int batch_size, + TRTConvertValidation(int max_batch_size, const std::unordered_set& parameters, framework::Scope& scope, // NOLINT - int workspace_size = 1 << 10) - : parameters_(parameters), scope_(scope) { + int workspace_size = 1 << 10, bool if_add_batch = true) + : parameters_(parameters), + scope_(scope), + if_add_batch_(if_add_batch), + max_batch_size_(max_batch_size) { // create engine. - engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_)); + engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_)); engine_->InitNetwork(); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); @@ -84,7 +87,7 @@ class TRTConvertValidation { // Declare a parameter varaible in the scope. void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) { - DeclVar(name, dims); + DeclVar(name, dims, true); } void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) { @@ -92,12 +95,18 @@ class TRTConvertValidation { } // Declare a variable in a fluid Scope. - void DeclVar(const std::string& name, const nvinfer1::Dims& dims) { + void DeclVar(const std::string& name, const nvinfer1::Dims& dims, + bool is_param = false) { platform::CPUPlace place; platform::CPUDeviceContext ctx(place); // Init Fluid tensor. std::vector dim_vec(dims.d, dims.d + dims.nbDims); + // There is no batchsize in ITensor's shape, but We should add it to + // tensor's shape of fluid. If the variable is not parameter and the + // if_add_batch_ flag is true, add the max batchsize to dim_vec. + if (is_param != true && if_add_batch_ == true) + dim_vec.insert(dim_vec.begin(), max_batch_size_); auto* x = scope_.Var(name); auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); @@ -131,6 +140,7 @@ class TRTConvertValidation { void Execute(int batch_size) { // Execute Fluid Op + PADDLE_ENFORCE_LE(batch_size, max_batch_size_); platform::CPUPlace place; platform::CPUDeviceContext ctx(place); op_->Run(scope_, place); @@ -149,9 +159,15 @@ class TRTConvertValidation { auto* var = scope_.FindVar(output); auto tensor = var->GetMutable(); framework::TensorToVector(*tensor, ctx, &fluid_out); + + size_t fluid_out_size = fluid_out.size(); + if (if_add_batch_ == true) { + fluid_out_size = + batch_size * (framework::product(tensor->dims()) / max_batch_size_); + } // Compare two output ASSERT_FALSE(fluid_out.empty()); - for (size_t i = 0; i < fluid_out.size(); i++) { + for (size_t i = 0; i < fluid_out_size; i++) { // Loose the threshold for CI in different machine model. EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5); } @@ -167,6 +183,12 @@ class TRTConvertValidation { std::unique_ptr op_desc_; const std::unordered_set& parameters_; framework::Scope& scope_; + // The ITensor of trt does not cotain the batch size, + // bug, in most cases, we need to set batch size for + // fluid's tensor shape. This variable indicates + // whether to add batch size to tensor shape of fluid. + bool if_add_batch_; + int max_batch_size_; }; } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index f8732e51b66bdc78aa35d06ba9651f1942a74b01..dc03702990587bf5e65d28da662d10df4d882110 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) { ASSERT_EQ(y_cpu[1], 14.5); } -TEST_F(TensorRTEngineTest, test_conv2d_temp) { +TEST_F(TensorRTEngineTest, test_conv2d) { // Weight in CPU memory. float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; float raw_bias[1] = {0}; @@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) { ASSERT_EQ(y_cpu[1], 6.0); } +TEST_F(TensorRTEngineTest, test_pool2d) { + // Weight in CPU memory. + auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT, + nvinfer1::Dims3{1, 2, 2}); + + nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE; + auto* pool_layer = + TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast(x), + pool_t, nvinfer1::DimsHW{2, 2}); + + PADDLE_ENFORCE(pool_layer != nullptr); + pool_layer->setStride(nvinfer1::DimsHW{1, 1}); + pool_layer->setPadding(nvinfer1::DimsHW{0, 0}); + + engine_->DeclareOutput(pool_layer, 0, "y"); + engine_->FreezeNetwork(); + ASSERT_EQ(engine_->engine()->getNbBindings(), 2); + + float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0}; + engine_->SetInputFromCPU("x", reinterpret_cast(&x_v), + 8 * sizeof(float)); + engine_->Execute(2); + + LOG(INFO) << "to get output"; + float* y_cpu = new float[2]; + engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float)); + + ASSERT_EQ(y_cpu[0], 2.0); + ASSERT_EQ(y_cpu[1], 5.0); +} + } // namespace tensorrt } // namespace inference } // namespace paddle