analyzer_dam_tester.cc 12.5 KB
Newer Older
Z
Zhen Wang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/tests/api/tester_helper.h"

Z
ZhenWang 已提交
17
DEFINE_int32(max_turn_num, 9,
Z
ZhenWang 已提交
18 19
             "The max turn number: 1 for the small and 9 for the normal.");

Z
Zhen Wang 已提交
20 21
namespace paddle {
namespace inference {
Z
ZhenWang 已提交
22 23 24

constexpr int32_t kMaxTurnLen = 50;

Z
Zhen Wang 已提交
25 26 27
static std::vector<float> result_data;

struct DataRecord {
Z
ZhenWang 已提交
28 29 30
  std::vector<std::vector<int64_t>> *turns;
  std::vector<std::vector<float>> *turns_mask;
  std::vector<std::vector<int64_t>> response;     // response data : 1
Z
Zhen Wang 已提交
31 32 33 34
  std::vector<std::vector<float>> response_mask;  // response mask data : 1
  size_t batch_iter{0};
  size_t batch_size{1};
  size_t num_samples;  // total number of samples
Z
ZhenWang 已提交
35 36 37 38 39 40 41 42

  DataRecord() {
    turns = new std::vector<std::vector<
        int64_t>>[FLAGS_max_turn_num];  // turns data : FLAGS_max_turn_num
    turns_mask = new std::vector<std::vector<
        float>>[FLAGS_max_turn_num];  // turns mask data : FLAGS_max_turn_num
  }

Z
Zhen Wang 已提交
43
  explicit DataRecord(const std::string &path, int batch_size = 1)
Z
ZhenWang 已提交
44 45
      : DataRecord() {
    this->batch_size = batch_size;
Z
Zhen Wang 已提交
46 47
    Load(path);
  }
Z
ZhenWang 已提交
48 49 50 51 52 53

  ~DataRecord() {
    delete[] turns;
    delete[] turns_mask;
  }

Z
Zhen Wang 已提交
54 55 56 57 58
  DataRecord NextBatch() {
    DataRecord data;
    size_t batch_end = batch_iter + batch_size;
    // NOTE skip the final batch, if no enough data is provided.
    if (batch_end <= response.size()) {
Z
ZhenWang 已提交
59
      for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
60 61 62
        data.turns[i].assign(turns[i].begin() + batch_iter,
                             turns[i].begin() + batch_end);
      }
Z
ZhenWang 已提交
63
      for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77
        data.turns_mask[i].assign(turns_mask[i].begin() + batch_iter,
                                  turns_mask[i].begin() + batch_end);
      }
      data.response.assign(response.begin() + batch_iter,
                           response.begin() + batch_end);
      data.response_mask.assign(response_mask.begin() + batch_iter,
                                response_mask.begin() + batch_end);
      CHECK(!data.response.empty());
      CHECK(!data.response_mask.empty());
      CHECK_EQ(data.response.size(), data.response_mask.size());
    }
    batch_iter += batch_size;
    return data;
  }
Z
ZhenWang 已提交
78

Z
Zhen Wang 已提交
79 80 81 82 83 84 85 86 87
  void Load(const std::string &path) {
    std::ifstream file(path);
    std::string line;
    size_t num_lines = 0;
    result_data.clear();
    while (std::getline(file, line)) {
      num_lines++;
      std::vector<std::string> data;
      split(line, ',', &data);
Z
ZhenWang 已提交
88
      CHECK_EQ(data.size(), (size_t)(2 * FLAGS_max_turn_num + 3));
Z
Zhen Wang 已提交
89
      // load turn data
Z
ZhenWang 已提交
90 91
      std::vector<int64_t> turns_tmp[FLAGS_max_turn_num];
      for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
92 93 94 95
        split_to_int64(data[i], ' ', &turns_tmp[i]);
        turns[i].push_back(std::move(turns_tmp[i]));
      }
      // load turn_mask data
Z
ZhenWang 已提交
96 97 98
      std::vector<float> turns_mask_tmp[FLAGS_max_turn_num];
      for (int i = 0; i < FLAGS_max_turn_num; ++i) {
        split_to_float(data[FLAGS_max_turn_num + i], ' ', &turns_mask_tmp[i]);
Z
Zhen Wang 已提交
99 100 101 102
        turns_mask[i].push_back(std::move(turns_mask_tmp[i]));
      }
      // load response data
      std::vector<int64_t> response_tmp;
Z
ZhenWang 已提交
103
      split_to_int64(data[2 * FLAGS_max_turn_num], ' ', &response_tmp);
Z
Zhen Wang 已提交
104 105 106
      response.push_back(std::move(response_tmp));
      // load response_mask data
      std::vector<float> response_mask_tmp;
Z
ZhenWang 已提交
107
      split_to_float(data[2 * FLAGS_max_turn_num + 1], ' ', &response_mask_tmp);
Z
Zhen Wang 已提交
108 109 110
      response_mask.push_back(std::move(response_mask_tmp));
      // load result data
      float result_tmp;
Z
ZhenWang 已提交
111
      result_tmp = std::stof(data[2 * FLAGS_max_turn_num + 2]);
Z
Zhen Wang 已提交
112 113 114 115 116 117 118 119
      result_data.push_back(result_tmp);
    }
    num_samples = num_lines;
  }
};

void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
                   int batch_size) {
Z
ZhenWang 已提交
120 121
  PaddleTensor turns_tensor[FLAGS_max_turn_num];
  PaddleTensor turns_mask_tensor[FLAGS_max_turn_num];
Z
Zhen Wang 已提交
122 123 124 125 126 127
  PaddleTensor response_tensor;
  PaddleTensor response_mask_tensor;
  std::string turn_pre = "turn_";
  std::string turn_mask_pre = "turn_mask_";

  auto one_batch = data->NextBatch();
Y
Yan Chunwei 已提交
128
  PADDLE_ENFORCE(!one_batch.response.empty());
Z
Zhen Wang 已提交
129
  int size = one_batch.response[0].size();
Z
ZhenWang 已提交
130
  CHECK_EQ(size, kMaxTurnLen);
Z
Zhen Wang 已提交
131
  // turn tensor assignment
Z
ZhenWang 已提交
132
  for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
133 134 135 136 137 138
    turns_tensor[i].name = turn_pre + std::to_string(i);
    turns_tensor[i].shape.assign({batch_size, size, 1});
    turns_tensor[i].dtype = PaddleDType::INT64;
    TensorAssignData<int64_t>(&turns_tensor[i], one_batch.turns[i]);
  }
  // turn mask tensor assignment
Z
ZhenWang 已提交
139
  for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    turns_mask_tensor[i].name = turn_mask_pre + std::to_string(i);
    turns_mask_tensor[i].shape.assign({batch_size, size, 1});
    turns_mask_tensor[i].dtype = PaddleDType::FLOAT32;
    TensorAssignData<float>(&turns_mask_tensor[i], one_batch.turns_mask[i]);
  }
  // response tensor assignment
  response_tensor.name = "response";
  response_tensor.shape.assign({batch_size, size, 1});
  response_tensor.dtype = PaddleDType::INT64;
  TensorAssignData<int64_t>(&response_tensor, one_batch.response);
  // response mask tensor assignment
  response_mask_tensor.name = "response_mask";
  response_mask_tensor.shape.assign({batch_size, size, 1});
  response_mask_tensor.dtype = PaddleDType::FLOAT32;
  TensorAssignData<float>(&response_mask_tensor, one_batch.response_mask);

  // Set inputs.
Z
ZhenWang 已提交
157
  for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
158 159
    input_slots->push_back(std::move(turns_tensor[i]));
  }
Z
ZhenWang 已提交
160
  for (int i = 0; i < FLAGS_max_turn_num; ++i) {
Z
Zhen Wang 已提交
161 162 163 164 165 166
    input_slots->push_back(std::move(turns_mask_tensor[i]));
  }
  input_slots->push_back(std::move(response_tensor));
  input_slots->push_back(std::move(response_mask_tensor));
}

167
void SetConfig(AnalysisConfig *cfg) {
168 169 170
  cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
  cfg->SwitchSpecifyInputNames();
  cfg->SwitchIrOptim(true);
Z
Zhen Wang 已提交
171 172
}

173
void SetOptimConfig(AnalysisConfig *cfg) {
174
  std::string optimModelPath = FLAGS_infer_model + "/saved_optim_model";
175 176 177 178 179
  cfg->SetModel(optimModelPath + "/model", optimModelPath + "/params");
  cfg->SwitchIrOptim(true);
  cfg->SwitchSpecifyInputNames();
}

Z
Zhen Wang 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
  std::vector<PaddleTensor> input_slots;
  int test_batch_num =
      FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
  LOG(INFO) << "The number of samples to be test: "
            << test_batch_num * FLAGS_batch_size;
  for (int bid = 0; bid < test_batch_num; ++bid) {
    input_slots.clear();
    PrepareInputs(&input_slots, &data, FLAGS_batch_size);
    (*inputs).emplace_back(input_slots);
  }
}

// Easy for profiling independently.
195
void profile(bool use_mkldnn = false) {
196
  AnalysisConfig cfg;
Z
Zhen Wang 已提交
197 198
  SetConfig(&cfg);

199 200
  if (use_mkldnn) {
    cfg.EnableMKLDNN();
201 202 203
    // Enable all the mkldnn supported ops except conv3d in dam
    std::unordered_set<std::string> op_list = {"softmax", "elementwise_add",
                                               "relu"};
204
    cfg.SetMKLDNNOp(op_list);
205 206
  }

207
  std::vector<std::vector<PaddleTensor>> outputs;
Z
Zhen Wang 已提交
208 209
  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
Y
Yan Chunwei 已提交
210

211 212
  TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
                 input_slots_all, &outputs, FLAGS_num_threads);
Z
Zhen Wang 已提交
213 214 215

  if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
    PADDLE_ENFORCE_GT(outputs.size(), 0);
216 217 218
    auto output = outputs.back();
    PADDLE_ENFORCE_GT(output.size(), 0);
    size_t size = GetSize(output[0]);
Z
Zhen Wang 已提交
219
    PADDLE_ENFORCE_GT(size, 0);
220
    float *result = static_cast<float *>(output[0].data.data());
Z
Zhen Wang 已提交
221 222 223 224 225 226
    for (size_t i = 0; i < size; i++) {
      EXPECT_NEAR(result[i], result_data[i], 1e-3);
    }
  }
}

227 228 229 230 231
TEST(Analyzer_dam, profile) { profile(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_dam, profile_mkldnn) { profile(true /* use_mkldnn */); }
#endif

Z
Zhen Wang 已提交
232 233
// Check the fuse status
TEST(Analyzer_dam, fuse_statis) {
234
  AnalysisConfig cfg;
Z
Zhen Wang 已提交
235 236
  SetConfig(&cfg);

T
Tao Luo 已提交
237 238 239 240 241
  int num_ops;
  auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
  auto fuse_statis = GetFuseStatis(
      static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
  ASSERT_TRUE(fuse_statis.count("fc_fuse"));
Z
Zhen Wang 已提交
242 243 244
}

// Compare result of NativeConfig and AnalysisConfig
245 246
void compare(bool use_mkldnn = false) {
  AnalysisConfig cfg;
Z
Zhen Wang 已提交
247
  SetConfig(&cfg);
248 249
  if (use_mkldnn) {
    cfg.EnableMKLDNN();
250 251 252
    // Enable all the mkldnn supported ops except conv3d in dam
    std::unordered_set<std::string> op_list = {"softmax", "elementwise_add",
                                               "relu"};
253
    cfg.SetMKLDNNOp(op_list);
254
  }
Z
Zhen Wang 已提交
255 256 257 258

  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);

T
Tao Luo 已提交
259 260
  CompareNativeAndAnalysis(
      reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
Z
Zhen Wang 已提交
261 262
}

Y
Yan Chunwei 已提交
263
// Compare result of NativeConfig and AnalysisConfig with memory optimization.
Y
Yan Chunwei 已提交
264
TEST(Analyzer_dam, compare_with_static_memory_optim) {
Y
Yan Chunwei 已提交
265 266
  // The small dam will core in CI, but works in local.
  if (FLAGS_max_turn_num == 9) {
267
    AnalysisConfig cfg, cfg1;
Y
Yan Chunwei 已提交
268 269 270 271 272 273
    DataRecord data(FLAGS_infer_data, FLAGS_batch_size);

    std::vector<std::vector<PaddleTensor>> input_slots_all;
    SetInput(&input_slots_all);
    // Run the first time to force to update memory cache
    SetConfig(&cfg);
Y
Yan Chunwei 已提交
274
    cfg.EnableMemoryOptim(true, true /*force update*/);
Y
Yan Chunwei 已提交
275 276 277 278 279 280 281

    CompareNativeAndAnalysis(
        reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
        input_slots_all);

    // Run second time to use the memory cache and perform memory optimization.
    SetConfig(&cfg1);
Y
Yan Chunwei 已提交
282
    cfg1.EnableMemoryOptim(true, false /*do not force update*/);
Y
Yan Chunwei 已提交
283 284 285 286 287 288 289

    CompareNativeAndAnalysis(
        reinterpret_cast<const PaddlePredictor::Config *>(&cfg1),
        input_slots_all);
  }
}

Y
Yan Chunwei 已提交
290 291 292
TEST(Analyzer_dam, compare_with_dynamic_memory_optim) {
  // The small dam will core in CI, but works in local.
  if (FLAGS_max_turn_num == 9) {
293
    AnalysisConfig cfg, cfg1;
Y
Yan Chunwei 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307
    DataRecord data(FLAGS_infer_data, FLAGS_batch_size);

    std::vector<std::vector<PaddleTensor>> input_slots_all;
    SetInput(&input_slots_all);
    // Run the first time to force to update memory cache
    SetConfig(&cfg);
    cfg.EnableMemoryOptim();

    CompareNativeAndAnalysis(
        reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
        input_slots_all);
  }
}

308
TEST(Analyzer_dam, compare) { compare(); }
Y
Yan Chunwei 已提交
309

310 311 312 313
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_dam, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif

L
luotao1 已提交
314 315 316 317 318 319 320 321 322 323 324
// Compare Deterministic result
TEST(Analyzer_dam, compare_determine) {
  AnalysisConfig cfg;
  SetConfig(&cfg);

  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
                       input_slots_all);
}

325 326 327
// Save optim model
TEST(Analyzer_dam, save_optim_model) {
  AnalysisConfig cfg;
328
  std::string optimModelPath = FLAGS_infer_model + "/saved_optim_model";
329
  mkdir(optimModelPath.c_str(), 0777);
330 331
  SetConfig(&cfg);
  SaveOptimModel(&cfg, optimModelPath);
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
}

void CompareOptimAndOrig(const PaddlePredictor::Config *orig_config,
                         const PaddlePredictor::Config *optim_config,
                         const std::vector<std::vector<PaddleTensor>> &inputs) {
  PrintConfig(orig_config, true);
  PrintConfig(optim_config, true);
  std::vector<std::vector<PaddleTensor>> orig_outputs, optim_outputs;
  TestOneThreadPrediction(orig_config, inputs, &orig_outputs, false);
  TestOneThreadPrediction(optim_config, inputs, &optim_outputs, false);
  CompareResult(orig_outputs.back(), optim_outputs.back());
}

TEST(Analyzer_dam, compare_optim_orig) {
  AnalysisConfig orig_cfg;
  AnalysisConfig optim_cfg;
  SetConfig(&orig_cfg);
  SetOptimConfig(&optim_cfg);
  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  CompareOptimAndOrig(
      reinterpret_cast<const PaddlePredictor::Config *>(&orig_cfg),
      reinterpret_cast<const PaddlePredictor::Config *>(&optim_cfg),
      input_slots_all);
}

Z
Zhen Wang 已提交
358 359
}  // namespace inference
}  // namespace paddle