diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt index dbb81462b8273bd701e9c9f530eaf69817abd6a1..2fa5a9540ba1311c7f87e6675a53044b23dd8276 100644 --- a/paddle/fluid/inference/tests/book/CMakeLists.txt +++ b/paddle/fluid/inference/tests/book/CMakeLists.txt @@ -38,3 +38,11 @@ inference_test(recommender_system) #inference_test(rnn_encoder_decoder) #inference_test(understand_sentiment ARGS conv) inference_test(word2vec) + +# This is an unly work around to make this test run +# TODO(TJ): clean me up +cc_test(test_inference_nlp + SRCS test_inference_nlp.cc + DEPS paddle_fluid + ARGS + --model_path=${PADDLE_BINARY_DIR}/python/paddle/fluid/tests/book/recognize_digits_mlp.inference.model) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc new file mode 100644 index 0000000000000000000000000000000000000000..70aa42ac4111c0524a55e26aaefa864338c1d6c1 --- /dev/null +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -0,0 +1,236 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include // NOLINT +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "paddle/fluid/inference/tests/test_helper.h" +#ifdef PADDLE_WITH_MKLML +#include +#include +#endif + +DEFINE_string(model_path, "", "Directory of the inference model."); +DEFINE_string(data_file, "", "File of input index data."); +DEFINE_int32(repeat, 100, "Running the inference program repeat times"); +DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); +DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); +DEFINE_int32(num_threads, 1, "Number of threads should be used"); + +inline double GetCurrentMs() { + struct timeval time; + gettimeofday(&time, NULL); + return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; +} + +// This function just give dummy data for recognize_digits model. +size_t DummyData(std::vector* out) { + paddle::framework::LoDTensor input; + SetupTensor(&input, {1, 1, 28, 28}, -1.f, 1.f); + out->emplace_back(input); + return 1; +} + +// Load the input word index data from file and save into LodTensor. +// Return the size of words. +size_t LoadData(std::vector* out, + const std::string& filename) { + if (filename.empty()) { + return DummyData(out); + } + + size_t sz = 0; + std::fstream fin(filename); + std::string line; + out->clear(); + while (getline(fin, line)) { + std::istringstream iss(line); + std::vector ids; + std::string field; + while (getline(iss, field, ' ')) { + ids.push_back(stoi(field)); + } + if (ids.size() >= 1024) { + // Synced with NLP guys, they will ignore input larger then 1024 + continue; + } + + paddle::framework::LoDTensor words; + paddle::framework::LoD lod{{0, ids.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(ids.size()), 1}, paddle::platform::CPUPlace()); + memcpy(pdata, ids.data(), words.numel() * sizeof(int64_t)); + out->emplace_back(words); + sz += ids.size(); + } + return sz; +} + +// Split input data samples into small pieces jobs as balanced as possible, +// according to the number of threads. +void SplitData( + const std::vector& datasets, + std::vector>* jobs, + const int num_threads) { + size_t s = 0; + jobs->resize(num_threads); + while (s < datasets.size()) { + for (auto it = jobs->begin(); it != jobs->end(); it++) { + it->emplace_back(&datasets[s]); + s++; + if (s >= datasets.size()) { + break; + } + } + } +} + +void ThreadRunInfer( + const int tid, paddle::framework::Executor* executor, + paddle::framework::Scope* scope, + const std::unique_ptr& inference_program, + const std::vector>& jobs) { + auto copy_program = std::unique_ptr( + new paddle::framework::ProgramDesc(*inference_program)); + auto& sub_scope = scope->NewScope(); + + std::string feed_holder_name = "feed_" + paddle::string::to_string(tid); + std::string fetch_holder_name = "fetch_" + paddle::string::to_string(tid); + copy_program->SetFeedHolderName(feed_holder_name); + copy_program->SetFetchHolderName(fetch_holder_name); + + const std::vector& feed_target_names = + copy_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + copy_program->GetFetchTargetNames(); + + PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL); + std::map fetch_targets; + paddle::framework::LoDTensor outtensor; + fetch_targets[fetch_target_names[0]] = &outtensor; + + std::map feed_targets; + PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); + + auto& inputs = jobs[tid]; + auto start_ms = GetCurrentMs(); + for (size_t i = 0; i < inputs.size(); ++i) { + feed_targets[feed_target_names[0]] = inputs[i]; + executor->Run(*copy_program, &sub_scope, &feed_targets, &fetch_targets, + true /*create_local_scope*/, true /*create_vars*/, + feed_holder_name, fetch_holder_name); + } + auto stop_ms = GetCurrentMs(); + scope->DeleteScope(&sub_scope); + LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() + << " samples, avg time per sample: " + << (stop_ms - start_ms) / inputs.size() << " ms"; +} + +TEST(inference, nlp) { + if (FLAGS_model_path.empty()) { + LOG(FATAL) << "Usage: ./example --model_path=path/to/your/model"; + } + if (FLAGS_data_file.empty()) { + LOG(WARNING) << "No data file provided, will use dummy data!" + << "Note: if you use nlp model, please provide data file."; + } + LOG(INFO) << "Model Path: " << FLAGS_model_path; + LOG(INFO) << "Data File: " << FLAGS_data_file; + + std::vector datasets; + size_t num_total_words = LoadData(&datasets, FLAGS_data_file); + LOG(INFO) << "Number of samples (seq_len<1024): " << datasets.size(); + LOG(INFO) << "Total number of words: " << num_total_words; + + const bool model_combined = false; + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // 1. Define place, executor, scope + auto place = paddle::platform::CPUPlace(); + auto executor = paddle::framework::Executor(place); + std::unique_ptr scope( + new paddle::framework::Scope()); + + // 2. Initialize the inference_program and load parameters + std::unique_ptr inference_program; + inference_program = + InitProgram(&executor, scope.get(), FLAGS_model_path, model_combined); + if (FLAGS_use_mkldnn) { + EnableMKLDNN(inference_program); + } + +#ifdef PADDLE_WITH_MKLML + // only use 1 thread number per std::thread + omp_set_dynamic(0); + omp_set_num_threads(1); + mkl_set_num_threads(1); +#endif + + double start_ms = 0, stop_ms = 0; + if (FLAGS_num_threads > 1) { + std::vector> jobs; + SplitData(datasets, &jobs, FLAGS_num_threads); + std::vector> threads; + start_ms = GetCurrentMs(); + for (int i = 0; i < FLAGS_num_threads; ++i) { + threads.emplace_back( + new std::thread(ThreadRunInfer, i, &executor, scope.get(), + std::ref(inference_program), std::ref(jobs))); + } + for (int i = 0; i < FLAGS_num_threads; ++i) { + threads[i]->join(); + } + stop_ms = GetCurrentMs(); + } else { + if (FLAGS_prepare_vars) { + executor.CreateVariables(*inference_program, scope.get(), 0); + } + // always prepare context + std::unique_ptr ctx; + ctx = executor.Prepare(*inference_program, 0); + + // preapre fetch + const std::vector& fetch_target_names = + inference_program->GetFetchTargetNames(); + PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL); + std::map fetch_targets; + paddle::framework::LoDTensor outtensor; + fetch_targets[fetch_target_names[0]] = &outtensor; + + // prepare feed + const std::vector& feed_target_names = + inference_program->GetFeedTargetNames(); + PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); + std::map feed_targets; + + // feed data and run + start_ms = GetCurrentMs(); + for (size_t i = 0; i < datasets.size(); ++i) { + feed_targets[feed_target_names[0]] = &(datasets[i]); + executor.RunPreparedContext(ctx.get(), scope.get(), &feed_targets, + &fetch_targets, !FLAGS_prepare_vars); + } + stop_ms = GetCurrentMs(); + LOG(INFO) << "Tid: 0, process " << datasets.size() + << " samples, avg time per sample: " + << (stop_ms - start_ms) / datasets.size() << " ms"; + } + LOG(INFO) << "Total inference time with " << FLAGS_num_threads + << " threads : " << (stop_ms - start_ms) / 1000.0 + << " sec, QPS: " << datasets.size() / ((stop_ms - start_ms) / 1000); +}