test_engine.cc 8.2 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <glog/logging.h>
#include <gtest/gtest.h>

N
nhzlx 已提交
18
#include "paddle/fluid/framework/tensor.h"
19
#include "paddle/fluid/inference/tensorrt/engine.h"
Y
Yan Chunwei 已提交
20 21 22 23 24 25 26 27 28
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace inference {
namespace tensorrt {

class TensorRTEngineTest : public ::testing::Test {
 protected:
  void SetUp() override {
N
nhzlx 已提交
29 30
    ctx_ = new platform::CUDADeviceContext(platform::CUDAPlace(0));

31
    engine_ = new TensorRTEngine(10, 1 << 10);
Y
Yan Chunwei 已提交
32 33 34
    engine_->InitNetwork();
  }

N
nhzlx 已提交
35 36 37 38 39 40
  void TearDown() override {
    if (engine_) {
      delete engine_;
      engine_ = nullptr;
    }
  }
N
nhzlx 已提交
41 42 43 44 45 46 47 48 49

  void PrepareInputOutput(const std::vector<float> &input,
                          std::vector<int> output_shape) {
    TensorFromVector(input, *ctx_, &input_);
    output_.Resize(framework::make_ddim(output_shape));
  }

  void GetOutput(std::vector<float> *output) {
    TensorToVector(output_, *ctx_, output);
Y
Yan Chunwei 已提交
50 51 52
  }

 protected:
N
nhzlx 已提交
53 54 55 56
  framework::Tensor input_;
  framework::Tensor output_;
  TensorRTEngine *engine_;
  platform::CUDADeviceContext *ctx_;
Y
Yan Chunwei 已提交
57 58 59 60 61 62 63 64
};

TEST_F(TensorRTEngineTest, add_layer) {
  const int size = 1;

  float raw_weight[size] = {2.};  // Weight in CPU memory.
  float raw_bias[size] = {3.};

N
nhzlx 已提交
65 66
  std::vector<void *> buffers(2);  // TRT binded inputs

Y
Yan Chunwei 已提交
67 68 69
  LOG(INFO) << "create weights";
  TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, size);
  TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, size);
N
nhzlx 已提交
70
  auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
71
                                  nvinfer1::Dims3{1, 1, 1});
N
nhzlx 已提交
72
  auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size,
Y
Yan Chunwei 已提交
73
                                        weight.get(), bias.get());
74 75 76
  PADDLE_ENFORCE_NOT_NULL(fc_layer,
                          platform::errors::InvalidArgument(
                              "TRT fully connected layer building failed."));
Y
Yan Chunwei 已提交
77 78 79 80 81 82 83

  engine_->DeclareOutput(fc_layer, 0, "y");
  LOG(INFO) << "freeze network";
  engine_->FreezeNetwork();
  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);

  // fill in real data
N
nhzlx 已提交
84 85 86 87 88 89 90 91 92 93
  std::vector<float> x_v = {1234};
  std::vector<float> y_cpu;
  PrepareInputOutput(x_v, {1});

  auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
  auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

  buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
  buffers[1] = reinterpret_cast<void *>(y_gpu_data);

94 95 96 97 98 99 100 101 102
  LOG(INFO) << "Set attr";
  engine_->Set("test_attr", new std::string("test_attr"));
  if (engine_->Has("test_attr")) {
    auto attr_val = engine_->Get<std::string>("test_attr");
    engine_->Erase("test_attr");
  }
  std::string *attr_key = new std::string("attr_key");
  engine_->SetNotOwned("attr1", attr_key);

Y
Yan Chunwei 已提交
103
  LOG(INFO) << "to execute";
104
  engine_->Execute(1, &buffers, ctx_->stream());
Y
Yan Chunwei 已提交
105 106

  LOG(INFO) << "to get output";
N
nhzlx 已提交
107
  GetOutput(&y_cpu);
Y
Yan Chunwei 已提交
108 109

  LOG(INFO) << "to checkout output";
N
nhzlx 已提交
110
  ASSERT_EQ(y_cpu[0], x_v[0] * 2 + 3);
111 112

  delete attr_key;
Y
Yan Chunwei 已提交
113 114
}

X
Xin Pan 已提交
115 116 117 118 119 120
TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
  // Weight in CPU memory.
  // It seems tensorrt FC use col-major: [[1.0, 3.3], [1.1, 4.4]]
  // instead of row-major, which is [[1.0, 1.1], [3.3, 4.4]]
  float raw_weight[4] = {1.0, 1.1, 3.3, 4.4};
  float raw_bias[2] = {1.3, 2.4};
N
nhzlx 已提交
121
  std::vector<void *> buffers(2);  // TRT binded inputs
X
Xin Pan 已提交
122 123 124

  TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 4);
  TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 2);
N
nhzlx 已提交
125
  auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
126
                                  nvinfer1::Dims3{1, 2, 1});
N
nhzlx 已提交
127
  auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
X
Xin Pan 已提交
128
                                        weight.get(), bias.get());
129 130 131
  PADDLE_ENFORCE_NOT_NULL(fc_layer,
                          platform::errors::InvalidArgument(
                              "TRT fully connected layer building failed."));
X
Xin Pan 已提交
132 133 134 135 136

  engine_->DeclareOutput(fc_layer, 0, "y");
  engine_->FreezeNetwork();
  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);

N
nhzlx 已提交
137 138 139 140 141 142 143 144 145 146 147
  // fill in real data
  std::vector<float> x_v = {1.0, 2.0};
  std::vector<float> y_cpu;
  PrepareInputOutput(x_v, {2});

  auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
  auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

  buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
  buffers[1] = reinterpret_cast<void *>(y_gpu_data);

148
  engine_->Execute(1, &buffers, ctx_->stream());
X
Xin Pan 已提交
149 150

  LOG(INFO) << "to get output";
N
nhzlx 已提交
151
  GetOutput(&y_cpu);
N
nhzlx 已提交
152

153 154 155 156
  auto dims = engine_->GetITensor("y")->getDimensions();
  ASSERT_EQ(dims.nbDims, 3);
  ASSERT_EQ(dims.d[0], 2);
  ASSERT_EQ(dims.d[1], 1);
N
nhzlx 已提交
157

X
Xin Pan 已提交
158 159 160 161
  ASSERT_EQ(y_cpu[0], 4.5);
  ASSERT_EQ(y_cpu[1], 14.5);
}

162
TEST_F(TensorRTEngineTest, test_conv2d) {
163 164 165
  // Weight in CPU memory.
  float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
  float raw_bias[1] = {0};
N
nhzlx 已提交
166
  std::vector<void *> buffers(2);  // TRT binded inputs
167 168 169

  TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 9);
  TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 1);
N
nhzlx 已提交
170
  auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
171
                                  nvinfer1::Dims3{1, 3, 3});
N
nhzlx 已提交
172
  auto *conv_layer =
173 174
      TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3},
                           weight.get(), bias.get());
175 176 177
  PADDLE_ENFORCE_NOT_NULL(conv_layer,
                          platform::errors::InvalidArgument(
                              "TRT convolution layer building failed."));
178 179 180 181 182 183 184
  conv_layer->setStride(nvinfer1::DimsHW{1, 1});
  conv_layer->setPadding(nvinfer1::DimsHW{1, 1});

  engine_->DeclareOutput(conv_layer, 0, "y");
  engine_->FreezeNetwork();
  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);

N
nhzlx 已提交
185 186 187 188 189 190 191 192 193 194 195 196
  // fill in real data
  std::vector<float> x_v = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
  std::vector<float> y_cpu;
  PrepareInputOutput(x_v, {18});

  auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
  auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

  buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
  buffers[1] = reinterpret_cast<void *>(y_gpu_data);

197
  engine_->Execute(2, &buffers, ctx_->stream());
198 199

  LOG(INFO) << "to get output";
N
nhzlx 已提交
200 201
  GetOutput(&y_cpu);

202 203 204 205
  ASSERT_EQ(y_cpu[0], 4.0);
  ASSERT_EQ(y_cpu[1], 6.0);
}

206 207
TEST_F(TensorRTEngineTest, test_pool2d) {
  // Weight in CPU memory.
N
nhzlx 已提交
208
  auto *x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
209 210
                                  nvinfer1::Dims3{1, 2, 2});

N
nhzlx 已提交
211
  std::vector<void *> buffers(2);  // TRT binded inputs
212
  nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
N
nhzlx 已提交
213 214
  auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t,
                                          nvinfer1::DimsHW{2, 2});
215

216 217 218
  PADDLE_ENFORCE_NOT_NULL(
      pool_layer,
      platform::errors::InvalidArgument("TRT pooling layer building failed."));
219 220 221 222 223 224 225
  pool_layer->setStride(nvinfer1::DimsHW{1, 1});
  pool_layer->setPadding(nvinfer1::DimsHW{0, 0});

  engine_->DeclareOutput(pool_layer, 0, "y");
  engine_->FreezeNetwork();
  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);

N
nhzlx 已提交
226 227 228 229 230 231 232 233 234 235 236
  // fill in real data
  std::vector<float> x_v = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
  std::vector<float> y_cpu;
  PrepareInputOutput(x_v, {2});

  auto *x_v_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
  auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

  buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
  buffers[1] = reinterpret_cast<void *>(y_gpu_data);

237
  engine_->Execute(2, &buffers, ctx_->stream());
238 239

  LOG(INFO) << "to get output";
N
nhzlx 已提交
240
  GetOutput(&y_cpu);
241 242 243 244 245

  ASSERT_EQ(y_cpu[0], 2.0);
  ASSERT_EQ(y_cpu[1], 5.0);
}

Y
Yan Chunwei 已提交
246 247 248
}  // namespace tensorrt
}  // namespace inference
}  // namespace paddle