1. support mutil batch utest 2. support pool op

01566fb6 · nhzlx · 21890ca0 · 01566fb6 · 01566fb6 · 01566fb6
7 changed file
--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter {
    const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
    const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);

+    PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
+
    nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX;
    if (pool_type == "max") {
      pool_t = nvinfer1::PoolingType::kMAX;

--- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
@@ -23,7 +23,8 @@ namespace tensorrt {
 TEST(ReluOpConverter, main) {
  framework::Scope scope;
  std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
+  int runtime_batch = 3;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
  validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
  validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));

@@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) {
  validator.SetOp(*desc.Proto());
  LOG(INFO) << "execute";

-  validator.Execute(1);
+  validator.Execute(runtime_batch);
 }

 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
@@ -23,10 +23,10 @@ namespace tensorrt {
 TEST(fc_op, test) {
  std::unordered_set<std::string> parameters({"mul-Y"});
  framework::Scope scope;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
-  validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
+  int runtime_batch = 2;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
+  validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
  validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
-  // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));

  // Prepare Op description
@@ -38,7 +38,7 @@ TEST(fc_op, test) {

  validator.SetOp(*desc.Proto());

-  validator.Execute(1);
+  validator.Execute(runtime_batch);
 }

 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
@@ -23,7 +23,8 @@ namespace tensorrt {
 TEST(MulOpConverter, main) {
  framework::Scope scope;
  std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
+  int runtime_batch = 0;
+  TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
  validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
  validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));

--- a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
@@ -23,9 +23,14 @@ namespace tensorrt {
 TEST(Pool2dOpConverter, main) {
  framework::Scope scope;
  std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
-  validator.DeclInputVar("pool2d-X", nvinfer1::Dims4(10, 3, 2, 2));
-  validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims4(10, 3, 1, 1));
+  int runtime_batch = 3;
+  TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
+
+  // We have already set the runtime batchsize, so the
+  // Dims should not contain the batch size.
+  // The ITensor's Dims of input and output should be C * H * W.
+  validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
+  validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));

  // Prepare Op description
  framework::OpDesc desc;
@@ -34,7 +39,7 @@ TEST(Pool2dOpConverter, main) {
  desc.SetOutput("Out", {"pool2d-Out"});

  std::vector<int> ksize({2, 2});
-  std::vector<int> strides({1, 1});
+  std::vector<int> strides({2, 2});
  std::vector<int> paddings({0, 0});
  std::string pooling_t = "max";

@@ -42,18 +47,12 @@ TEST(Pool2dOpConverter, main) {
  desc.SetAttr("ksize", ksize);
  desc.SetAttr("strides", strides);
  desc.SetAttr("paddings", paddings);
-  // std::string temp = "";
-  // (*desc.Proto()).SerializeToString(&temp);
-
-  // std::cout << temp << std::endl;
-  // std::ofstream f("__temp__", std::ios::out);
-  // f << temp;

  LOG(INFO) << "set OP";
  validator.SetOp(*desc.Proto());
  LOG(INFO) << "execute";

-  validator.Execute(10);
+  validator.Execute(runtime_batch);
 }

 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h
+++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -63,13 +63,15 @@ class TRTConvertValidation {
 public:
  TRTConvertValidation() = delete;

-  TRTConvertValidation(int batch_size,
+  TRTConvertValidation(int max_batch_size,
                       const std::unordered_set<std::string>& parameters,
                       framework::Scope& scope,  // NOLINT
-                       int workspace_size = 1 << 10)
-      : parameters_(parameters), scope_(scope) {
+                       int workspace_size = 1 << 10, int runtime_batch_size = 1)
+      : parameters_(parameters),
+        scope_(scope),
+        runtime_batch_size_(runtime_batch_size) {
    // create engine.
-    engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
+    engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
    engine_->InitNetwork();

    PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
@@ -84,7 +86,7 @@ class TRTConvertValidation {

  // Declare a parameter varaible in the scope.
  void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
-    DeclVar(name, dims);
+    DeclVar(name, dims, true);
  }

  void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
@@ -92,12 +94,20 @@ class TRTConvertValidation {
  }

  // Declare a variable in a fluid Scope.
-  void DeclVar(const std::string& name, const nvinfer1::Dims& dims) {
+  void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
+               bool is_param = false) {
    platform::CPUPlace place;
    platform::CPUDeviceContext ctx(place);

    // Init Fluid tensor.
    std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
+    // There is no batchsize in ITensor's shape, but We should add it to
+    // tensor's
+    // shape of fluid. If the variable is not parameter and the batch size
+    // greater than 0,
+    // add the batchsize to dim_vec.
+    if (is_param != true && runtime_batch_size_ > 0)
+      dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
    auto* x = scope_.Var(name);
    auto* x_tensor = x->GetMutable<framework::LoDTensor>();
    x_tensor->Resize(framework::make_ddim(dim_vec));
@@ -167,6 +177,10 @@ class TRTConvertValidation {
  std::unique_ptr<framework::OpDesc> op_desc_;
  const std::unordered_set<std::string>& parameters_;
  framework::Scope& scope_;
+  //  It represents the runtime batchsize when we test.
+  //  If the value greater than 0, we add this to
+  //  the first dimension of tensor's shape of fluid.
+  int runtime_batch_size_;
 };

 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/test_engine.cc
+++ b/paddle/fluid/inference/tensorrt/test_engine.cc
@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
  ASSERT_EQ(y_cpu[1], 14.5);
 }

-TEST_F(TensorRTEngineTest, test_conv2d_temp) {
+TEST_F(TensorRTEngineTest, test_conv2d) {
  // Weight in CPU memory.
  float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
  float raw_bias[1] = {0};
@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
  ASSERT_EQ(y_cpu[1], 6.0);
 }

+TEST_F(TensorRTEngineTest, test_pool2d) {
+  // Weight in CPU memory.
+  auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
+                                  nvinfer1::Dims3{1, 2, 2});
+
+  nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
+  auto* pool_layer =
+      TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
+                           pool_t, nvinfer1::DimsHW{2, 2});
+
+  PADDLE_ENFORCE(pool_layer != nullptr);
+  pool_layer->setStride(nvinfer1::DimsHW{1, 1});
+  pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
+
+  engine_->DeclareOutput(pool_layer, 0, "y");
+  engine_->FreezeNetwork();
+  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
+
+  float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
+  engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
+                           8 * sizeof(float));
+  engine_->Execute(2);
+
+  LOG(INFO) << "to get output";
+  float* y_cpu = new float[2];
+  engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
+
+  ASSERT_EQ(y_cpu[0], 2.0);
+  ASSERT_EQ(y_cpu[1], 5.0);
+}
+
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle