diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index ea282a79d5c0b4f1f3c5463d30b661840f6b6b00..2db551758c87e86faa533a236690f2d6496deb26 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter { const nvinfer1::DimsHW nv_strides(strides[0], strides[1]); const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]); + PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL); + nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX; if (pool_type == "max") { pool_t = nvinfer1::PoolingType::kMAX; diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index 7dabfd9f6a9a8cfbdd1d9a66541180d3499b7bdc..5a1543de8118b2856e634e0ca7ed706b99e84c53 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -23,7 +23,8 @@ namespace tensorrt { TEST(ReluOpConverter, main) { framework::Scope scope; std::unordered_set parameters; - TRTConvertValidation validator(10, parameters, scope, 1000); + int runtime_batch = 3; + TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch); validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6)); validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6)); @@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) { validator.SetOp(*desc.Proto()); LOG(INFO) << "execute"; - validator.Execute(1); + validator.Execute(runtime_batch); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc index 081f4d605975f1408d4d8a8ed3108c04d837a4de..0c621083326cea34dbc947830b6198818aad8bca 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc @@ -23,10 +23,10 @@ namespace tensorrt { TEST(fc_op, test) { std::unordered_set parameters({"mul-Y"}); framework::Scope scope; - TRTConvertValidation validator(10, parameters, scope, 1000); - validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1)); + int runtime_batch = 2; + TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch); + validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1)); validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2)); - // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2)); // Prepare Op description @@ -38,7 +38,7 @@ TEST(fc_op, test) { validator.SetOp(*desc.Proto()); - validator.Execute(1); + validator.Execute(runtime_batch); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc index 674f37f2fdddf013a8f6f4671debbc19c3322423..a54870566a4d2559e704fee925d736cd3fd4963d 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc @@ -23,7 +23,8 @@ namespace tensorrt { TEST(MulOpConverter, main) { framework::Scope scope; std::unordered_set parameters; - TRTConvertValidation validator(10, parameters, scope, 1000); + int runtime_batch = 0; + TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch); validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10)); diff --git a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc index 261eb2c6ceaf5d276bebb0a0634ca9c8e67cfd5d..57c982f2c029af4a694716532c699f50f9f64458 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc @@ -23,9 +23,14 @@ namespace tensorrt { TEST(Pool2dOpConverter, main) { framework::Scope scope; std::unordered_set parameters; - TRTConvertValidation validator(10, parameters, scope, 1000); - validator.DeclInputVar("pool2d-X", nvinfer1::Dims4(10, 3, 2, 2)); - validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims4(10, 3, 1, 1)); + int runtime_batch = 3; + TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch); + + // We have already set the runtime batchsize, so the + // Dims should not contain the batch size. + // The ITensor's Dims of input and output should be C * H * W. + validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4)); + validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2)); // Prepare Op description framework::OpDesc desc; @@ -34,7 +39,7 @@ TEST(Pool2dOpConverter, main) { desc.SetOutput("Out", {"pool2d-Out"}); std::vector ksize({2, 2}); - std::vector strides({1, 1}); + std::vector strides({2, 2}); std::vector paddings({0, 0}); std::string pooling_t = "max"; @@ -42,18 +47,12 @@ TEST(Pool2dOpConverter, main) { desc.SetAttr("ksize", ksize); desc.SetAttr("strides", strides); desc.SetAttr("paddings", paddings); - // std::string temp = ""; - // (*desc.Proto()).SerializeToString(&temp); - - // std::cout << temp << std::endl; - // std::ofstream f("__temp__", std::ios::out); - // f << temp; LOG(INFO) << "set OP"; validator.SetOp(*desc.Proto()); LOG(INFO) << "execute"; - validator.Execute(10); + validator.Execute(runtime_batch); } } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index f14885b238134cdf38a278cd8a0734947bcacfe0..e20169c4cff0999b795e495215aec89a3574033d 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -63,13 +63,15 @@ class TRTConvertValidation { public: TRTConvertValidation() = delete; - TRTConvertValidation(int batch_size, + TRTConvertValidation(int max_batch_size, const std::unordered_set& parameters, framework::Scope& scope, // NOLINT - int workspace_size = 1 << 10) - : parameters_(parameters), scope_(scope) { + int workspace_size = 1 << 10, int runtime_batch_size = 1) + : parameters_(parameters), + scope_(scope), + runtime_batch_size_(runtime_batch_size) { // create engine. - engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_)); + engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_)); engine_->InitNetwork(); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); @@ -84,7 +86,7 @@ class TRTConvertValidation { // Declare a parameter varaible in the scope. void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) { - DeclVar(name, dims); + DeclVar(name, dims, true); } void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) { @@ -92,12 +94,20 @@ class TRTConvertValidation { } // Declare a variable in a fluid Scope. - void DeclVar(const std::string& name, const nvinfer1::Dims& dims) { + void DeclVar(const std::string& name, const nvinfer1::Dims& dims, + bool is_param = false) { platform::CPUPlace place; platform::CPUDeviceContext ctx(place); // Init Fluid tensor. std::vector dim_vec(dims.d, dims.d + dims.nbDims); + // There is no batchsize in ITensor's shape, but We should add it to + // tensor's + // shape of fluid. If the variable is not parameter and the batch size + // greater than 0, + // add the batchsize to dim_vec. + if (is_param != true && runtime_batch_size_ > 0) + dim_vec.insert(dim_vec.begin(), runtime_batch_size_); auto* x = scope_.Var(name); auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); @@ -167,6 +177,10 @@ class TRTConvertValidation { std::unique_ptr op_desc_; const std::unordered_set& parameters_; framework::Scope& scope_; + // It represents the runtime batchsize when we test. + // If the value greater than 0, we add this to + // the first dimension of tensor's shape of fluid. + int runtime_batch_size_; }; } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index f8732e51b66bdc78aa35d06ba9651f1942a74b01..dc03702990587bf5e65d28da662d10df4d882110 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) { ASSERT_EQ(y_cpu[1], 14.5); } -TEST_F(TensorRTEngineTest, test_conv2d_temp) { +TEST_F(TensorRTEngineTest, test_conv2d) { // Weight in CPU memory. float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; float raw_bias[1] = {0}; @@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) { ASSERT_EQ(y_cpu[1], 6.0); } +TEST_F(TensorRTEngineTest, test_pool2d) { + // Weight in CPU memory. + auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT, + nvinfer1::Dims3{1, 2, 2}); + + nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE; + auto* pool_layer = + TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast(x), + pool_t, nvinfer1::DimsHW{2, 2}); + + PADDLE_ENFORCE(pool_layer != nullptr); + pool_layer->setStride(nvinfer1::DimsHW{1, 1}); + pool_layer->setPadding(nvinfer1::DimsHW{0, 0}); + + engine_->DeclareOutput(pool_layer, 0, "y"); + engine_->FreezeNetwork(); + ASSERT_EQ(engine_->engine()->getNbBindings(), 2); + + float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0}; + engine_->SetInputFromCPU("x", reinterpret_cast(&x_v), + 8 * sizeof(float)); + engine_->Execute(2); + + LOG(INFO) << "to get output"; + float* y_cpu = new float[2]; + engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float)); + + ASSERT_EQ(y_cpu[0], 2.0); + ASSERT_EQ(y_cpu[1], 5.0); +} + } // namespace tensorrt } // namespace inference } // namespace paddle