api_impl_tester.cc 12.0 KB
Newer Older
X
Xin Pan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <glog/logging.h>
#include <gtest/gtest.h>

L
Luo Tao 已提交
18
#include <thread>  // NOLINT
T
tensor-tang 已提交
19

X
Xin Pan 已提交
20
#include "gflags/gflags.h"
L
Luo Tao 已提交
21
#include "paddle/fluid/inference/api/api_impl.h"
X
Xin Pan 已提交
22 23
#include "paddle/fluid/inference/tests/test_helper.h"

J
JiabinYang 已提交
24
#ifdef __clang__
25
#define ACC_DIFF 4e-3
J
JiabinYang 已提交
26
#else
27
#define ACC_DIFF 1e-3
J
JiabinYang 已提交
28 29
#endif

30 31 32
DEFINE_string(word2vec_dirname, "",
              "Directory of the word2vec inference model.");
DEFINE_string(book_dirname, "", "Directory of the book inference model.");
X
Xin Pan 已提交
33 34 35 36 37 38

namespace paddle {

PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
  PaddleTensor pt;

Y
Yu Yang 已提交
39
  if (t->type() == framework::proto::VarType::INT64) {
40
    pt.data.Reset(t->data<void>(), t->numel() * sizeof(int64_t));
X
Xin Pan 已提交
41
    pt.dtype = PaddleDType::INT64;
Y
Fix ut  
Yu Yang 已提交
42
  } else if (t->type() == framework::proto::VarType::FP32) {
43
    pt.data.Reset(t->data<void>(), t->numel() * sizeof(float));
X
Xin Pan 已提交
44
    pt.dtype = PaddleDType::FLOAT32;
45 46 47
  } else if (t->type() == framework::proto::VarType::INT32) {
    pt.data.Reset(t->data<void>(), t->numel() * sizeof(int32_t));
    pt.dtype = PaddleDType::INT32;
X
Xin Pan 已提交
48
  } else {
49 50
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unsupported tensor date type. Now only supports INT64, FP32, INT32."));
X
Xin Pan 已提交
51
  }
52
  pt.shape = framework::vectorize<int>(t->dims());
X
Xin Pan 已提交
53 54 55
  return pt;
}

Y
Yan Chunwei 已提交
56 57
NativeConfig GetConfig() {
  NativeConfig config;
58
  config.model_dir = FLAGS_word2vec_dirname;
X
Xin Pan 已提交
59
  LOG(INFO) << "dirname  " << config.model_dir;
X
Xin Pan 已提交
60
  config.fraction_of_gpu_memory = 0.15;
X
Xin Pan 已提交
61
  config.device = 0;
62 63
  return config;
}
X
Xin Pan 已提交
64

65
void MainWord2Vec(const paddle::PaddlePlace& place) {
Y
Yan Chunwei 已提交
66 67
  NativeConfig config = GetConfig();
  auto predictor = CreatePaddlePredictor<NativeConfig>(config);
68 69
  config.use_gpu = paddle::gpu_place_used(place);
  config.use_xpu = paddle::xpu_place_used(place);
X
Xin Pan 已提交
70 71 72 73 74 75 76 77 78 79

  framework::LoDTensor first_word, second_word, third_word, fourth_word;
  framework::LoD lod{{0, 1}};
  int64_t dict_size = 2073;  // The size of dictionary

  SetupLoDTensor(&first_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&second_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);

80 81 82 83 84 85 86 87 88
  std::vector<PaddleTensor> paddle_tensor_feeds;
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&first_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&second_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&third_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));

  std::vector<PaddleTensor> outputs;
  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
  ASSERT_EQ(outputs.size(), 1UL);
89 90
  size_t len = outputs[0].data.length();
  float* data = static_cast<float*>(outputs[0].data.data());
91
  for (size_t j = 0; j < len / sizeof(float); ++j) {
92 93 94 95 96 97 98 99 100 101
    ASSERT_LT(data[j], 1.0);
    ASSERT_GT(data[j], -1.0);
  }

  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&first_word);
  cpu_feeds.push_back(&second_word);
  cpu_feeds.push_back(&third_word);
  cpu_feeds.push_back(&fourth_word);

102 103
  framework::FetchType output1;
  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
104 105 106 107
  cpu_fetchs1.push_back(&output1);

  TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);

108
  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
109 110
  float* lod_data = output1_tensor.data<float>();
  for (int i = 0; i < output1_tensor.numel(); ++i) {
J
JiabinYang 已提交
111 112
    EXPECT_LT(lod_data[i] - data[i], ACC_DIFF);
    EXPECT_GT(lod_data[i] - data[i], -ACC_DIFF);
113 114 115
  }
}

116
void MainImageClassification(const paddle::PaddlePlace& place) {
117 118
  int batch_size = 2;
  bool repeat = false;
Y
Yan Chunwei 已提交
119
  NativeConfig config = GetConfig();
120 121
  config.use_gpu = paddle::gpu_place_used(place);
  config.use_xpu = paddle::xpu_place_used(place);
122
  config.model_dir =
123
      FLAGS_book_dirname + "/image_classification_resnet.inference.model";
124 125 126 127 128 129 130 131 132 133

  const bool is_combined = false;
  std::vector<std::vector<int64_t>> feed_target_shapes =
      GetFeedTargetShapes(config.model_dir, is_combined);

  framework::LoDTensor input;
  // Use normilized image pixels as input data,
  // which should be in the range [0.0, 1.0].
  feed_target_shapes[0][0] = batch_size;
  framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
134 135
  SetupTensor<float>(&input, input_dims, static_cast<float>(0),
                     static_cast<float>(1));
136 137 138
  std::vector<framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&input);

139 140
  framework::FetchType output1;
  std::vector<framework::FetchType*> cpu_fetchs1;
141 142
  cpu_fetchs1.push_back(&output1);

L
Luo Tao 已提交
143 144
  TestInference<platform::CPUPlace, false, true>(
      config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined);
145

Y
Yan Chunwei 已提交
146
  auto predictor = CreatePaddlePredictor(config);
147 148
  std::vector<PaddleTensor> paddle_tensor_feeds;
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));
X
Xin Pan 已提交
149 150

  std::vector<PaddleTensor> outputs;
151
  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
152
  ASSERT_EQ(outputs.size(), 1UL);
153 154
  size_t len = outputs[0].data.length();
  float* data = static_cast<float*>(outputs[0].data.data());
155
  float* lod_data =
156
      BOOST_GET(paddle::framework::LoDTensor, output1).data<float>();
157
  for (size_t j = 0; j < len / sizeof(float); ++j) {
J
JiabinYang 已提交
158
    EXPECT_NEAR(lod_data[j], data[j], ACC_DIFF);
X
Xin Pan 已提交
159 160 161
  }
}

162
void MainThreadsWord2Vec(const paddle::PaddlePlace& place) {
T
tensor-tang 已提交
163
  NativeConfig config = GetConfig();
164 165
  config.use_gpu = paddle::gpu_place_used(place);
  config.use_xpu = paddle::xpu_place_used(place);
T
tensor-tang 已提交
166 167
  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);

168
  // prepare inputs data and reference results
T
tensor-tang 已提交
169 170 171
  constexpr int num_jobs = 3;
  std::vector<std::vector<framework::LoDTensor>> jobs(num_jobs);
  std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
172
  std::vector<framework::FetchType> refs(num_jobs);
T
tensor-tang 已提交
173 174 175 176 177 178 179 180 181 182 183 184
  for (size_t i = 0; i < jobs.size(); ++i) {
    // each job has 4 words
    jobs[i].resize(4);
    for (size_t j = 0; j < 4; ++j) {
      framework::LoD lod{{0, 1}};
      int64_t dict_size = 2073;  // The size of dictionary
      SetupLoDTensor(&jobs[i][j], lod, static_cast<int64_t>(0), dict_size - 1);
      paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i][j]));
    }

    // get reference result of each job
    std::vector<paddle::framework::LoDTensor*> ref_feeds;
185
    std::vector<paddle::framework::FetchType*> ref_fetches(1, &refs[i]);
T
tensor-tang 已提交
186 187 188 189 190 191 192 193 194 195
    for (auto& word : jobs[i]) {
      ref_feeds.push_back(&word);
    }
    TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
  }

  // create threads and each thread run 1 job
  std::vector<std::thread> threads;
  for (int tid = 0; tid < num_jobs; ++tid) {
    threads.emplace_back([&, tid]() {
Y
Yan Chunwei 已提交
196
      auto predictor = CreatePaddlePredictor(config);
T
tensor-tang 已提交
197 198 199 200 201 202
      auto& local_inputs = paddle_tensor_feeds[tid];
      std::vector<PaddleTensor> local_outputs;
      ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));

      // check outputs range
      ASSERT_EQ(local_outputs.size(), 1UL);
203 204
      const size_t len = local_outputs[0].data.length();
      float* data = static_cast<float*>(local_outputs[0].data.data());
T
tensor-tang 已提交
205 206 207 208 209 210
      for (size_t j = 0; j < len / sizeof(float); ++j) {
        ASSERT_LT(data[j], 1.0);
        ASSERT_GT(data[j], -1.0);
      }

      // check outputs correctness
211
      auto ref_tensor = BOOST_GET(paddle::framework::LoDTensor, refs[tid]);
212 213 214
      float* ref_data = ref_tensor.data<float>();
      EXPECT_EQ(ref_tensor.numel(), static_cast<int64_t>(len / sizeof(float)));
      for (int i = 0; i < ref_tensor.numel(); ++i) {
S
update  
superjomn 已提交
215
        EXPECT_NEAR(ref_data[i], data[i], 2e-3);
T
tensor-tang 已提交
216
      }
217 218 219 220 221 222 223
    });
  }
  for (int i = 0; i < num_jobs; ++i) {
    threads[i].join();
  }
}

224
void MainThreadsImageClassification(const paddle::PaddlePlace& place) {
225 226 227
  constexpr int num_jobs = 4;  // each job run 1 batch
  constexpr int batch_size = 1;
  NativeConfig config = GetConfig();
228 229
  config.use_gpu = paddle::gpu_place_used(place);
  config.use_xpu = paddle::xpu_place_used(place);
230
  config.model_dir =
231
      FLAGS_book_dirname + "/image_classification_resnet.inference.model";
232 233 234 235

  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
  std::vector<framework::LoDTensor> jobs(num_jobs);
  std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
236
  std::vector<framework::FetchType> refs(num_jobs);
237 238 239 240 241 242 243 244 245 246 247
  for (size_t i = 0; i < jobs.size(); ++i) {
    // prepare inputs
    std::vector<std::vector<int64_t>> feed_target_shapes =
        GetFeedTargetShapes(config.model_dir, /*is_combined*/ false);
    feed_target_shapes[0][0] = batch_size;
    framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
    SetupTensor<float>(&jobs[i], input_dims, 0.f, 1.f);
    paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i]));

    // get reference result of each job
    std::vector<framework::LoDTensor*> ref_feeds(1, &jobs[i]);
248
    std::vector<framework::FetchType*> ref_fetches(1, &refs[i]);
249 250
    TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
  }
T
tensor-tang 已提交
251

252 253 254 255
  // create threads and each thread run 1 job
  std::vector<std::thread> threads;
  for (int tid = 0; tid < num_jobs; ++tid) {
    threads.emplace_back([&, tid]() {
Y
Yan Chunwei 已提交
256
      auto predictor = CreatePaddlePredictor(config);
257 258 259 260 261 262
      auto& local_inputs = paddle_tensor_feeds[tid];
      std::vector<PaddleTensor> local_outputs;
      ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));

      // check outputs correctness
      ASSERT_EQ(local_outputs.size(), 1UL);
263 264
      const size_t len = local_outputs[0].data.length();
      float* data = static_cast<float*>(local_outputs[0].data.data());
265
      auto ref_tensor = BOOST_GET(paddle::framework::LoDTensor, refs[tid]);
266 267 268
      float* ref_data = ref_tensor.data<float>();
      EXPECT_EQ((size_t)ref_tensor.numel(), len / sizeof(float));
      for (int i = 0; i < ref_tensor.numel(); ++i) {
J
JiabinYang 已提交
269
        EXPECT_NEAR(ref_data[i], data[i], ACC_DIFF);
270
      }
T
tensor-tang 已提交
271 272 273 274 275 276 277
    });
  }
  for (int i = 0; i < num_jobs; ++i) {
    threads[i].join();
  }
}

278 279 280
TEST(inference_api_native, word2vec_cpu) {
  MainWord2Vec(paddle::PaddlePlace::kCPU);
}
T
tensor-tang 已提交
281
TEST(inference_api_native, word2vec_cpu_threads) {
282
  MainThreadsWord2Vec(paddle::PaddlePlace::kCPU);
T
tensor-tang 已提交
283 284
}
TEST(inference_api_native, image_classification_cpu) {
285
  MainImageClassification(paddle::PaddlePlace::kCPU);
T
tensor-tang 已提交
286 287
}
TEST(inference_api_native, image_classification_cpu_threads) {
288
  MainThreadsImageClassification(paddle::PaddlePlace::kCPU);
T
tensor-tang 已提交
289 290
}

291 292 293 294 295 296 297 298 299
#ifdef PADDLE_WITH_XPU
TEST(inference_api_native, word2vec_xpu) {
  MainWord2Vec(paddle::PaddlePlace::kXPU);
}
TEST(inference_api_native, image_classification_xpu) {
  MainImageClassification(paddle::PaddlePlace::kXPU);
}
#endif

300
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
301 302 303
TEST(inference_api_native, word2vec_gpu) {
  MainWord2Vec(paddle::PaddlePlace::kGPU);
}
S
superjomn 已提交
304 305
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, word2vec_gpu_threads) {
306
//   MainThreadsWord2Vec(paddle::PaddlePlace::kGPU);
S
superjomn 已提交
307
// }
T
tensor-tang 已提交
308
TEST(inference_api_native, image_classification_gpu) {
309
  MainImageClassification(paddle::PaddlePlace::kGPU);
T
tensor-tang 已提交
310
}
S
superjomn 已提交
311 312
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, image_classification_gpu_threads) {
313
//   MainThreadsImageClassification(paddle::PaddlePlace::kGPU);
S
superjomn 已提交
314
// }
T
tensor-tang 已提交
315 316
#endif

317
TEST(PassBuilder, Delete) {
318
  AnalysisConfig config;
319
  config.DisableGpu();
320 321 322 323 324 325
  config.pass_builder()->DeletePass("attention_lstm_fuse_pass");
  const auto& passes = config.pass_builder()->AllPasses();
  auto it = std::find(passes.begin(), passes.end(), "attention_lstm_fuse_pass");
  ASSERT_EQ(it, passes.end());
}

X
Xin Pan 已提交
326
}  // namespace paddle