diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index c942b43f174895d1bfa9688bb6d651f440b9bf41..ca02e38ede7a4b4be2ebe2602fbb720b79aaa741 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include +#include // NOLINT #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -31,76 +32,74 @@ TEST(inference, understand_sentiment) { LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; std::string dirname = FLAGS_dirname; - - // 0. Call `paddle::framework::InitDevices()` initialize all the devices - // In unittests, this is done in paddle/testing/paddle_gtest_main.cc - paddle::framework::LoDTensor words; - /* - paddle::framework::LoD lod{{0, 83}}; - int64_t word_dict_len = 198392; - SetupLoDTensor(&words, lod, static_cast(0), - static_cast(word_dict_len - 1)); - */ - std::vector srcdata{ - 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550, - 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463, - 6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784, - 784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, 198391, - 6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029, - 1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, 198391, - 6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550, - 9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, - 4376, 10251, 10760}; - paddle::framework::LoD lod{{0, srcdata.size()}}; - words.set_lod(lod); - int64_t* pdata = words.mutable_data( - {static_cast(srcdata.size()), 1}, paddle::platform::CPUPlace()); - memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); - - LOG(INFO) << "number of input size:" << words.numel(); - std::vector cpu_feeds; - cpu_feeds.push_back(&words); - - paddle::framework::LoDTensor output1; - std::vector cpu_fetchs1; - cpu_fetchs1.push_back(&output1); - - // Run inference on CPU const bool model_combined = false; - if (FLAGS_prepare_vars) { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } else { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } - LOG(INFO) << output1.lod(); - LOG(INFO) << output1.dims(); + int total_work = 100; + int num_threads = 10; + int work_per_thread = total_work / num_threads; + std::vector> infer_threads; + for (int i = 0; i < num_threads; ++i) { + infer_threads.emplace_back(new std::thread([&, i]() { + for (int j = 0; j < work_per_thread; ++j) { + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + paddle::framework::LoDTensor words; + /* + paddle::framework::LoD lod{{0, 83}}; + int64_t word_dict_len = 198392; + SetupLoDTensor(&words, lod, static_cast(0), + static_cast(word_dict_len - 1)); + */ + std::vector srcdata{ + 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, + 1550, 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, + 396, 6463, 6463, 1550, 198391, 6463, 22564, 1612, 291, + 68, 164, 784, 784, 1550, 198391, 6463, 13659, 3362, + 42468, 6189, 2209, 198391, 6463, 2209, 2209, 198391, 6463, + 2209, 1062, 3029, 1831, 3029, 1065, 2281, 100, 11216, + 1110, 56, 10869, 9811, 100, 198391, 6463, 100, 9280, + 100, 288, 40031, 1680, 1335, 100, 1550, 9280, 7265, + 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, 4376, + 10251, 10760}; + paddle::framework::LoD lod{{0, srcdata.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(srcdata.size()), 1}, + paddle::platform::CPUPlace()); + memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); -#ifdef PADDLE_WITH_CUDA - paddle::framework::LoDTensor output2; - std::vector cpu_fetchs2; - cpu_fetchs2.push_back(&output2); + LOG(INFO) << "number of input size:" << words.numel(); + std::vector cpu_feeds; + cpu_feeds.push_back(&words); - // Run inference on CUDA GPU - TestInference(dirname, cpu_feeds, cpu_fetchs2); - LOG(INFO) << output2.lod(); - LOG(INFO) << output2.dims(); + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); - CheckError(output1, output2); -#endif + // Run inference on CPU + if (FLAGS_prepare_vars) { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } else { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } + LOG(INFO) << output1.lod(); + LOG(INFO) << output1.dims(); + } + })); + } }