diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 98780b6881caa96c0356e07a0a3bd040f28efba0..65db7c7b5008dcb301e741ec17c3623715e10bb8 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -117,7 +117,7 @@ std::unique_ptr Load(framework::Executor* executor, std::string program_desc_str; VLOG(3) << "loading model from " << model_filename; ReadBinaryFile(model_filename, &program_desc_str); - // LOG(INFO) << program_desc_str; + std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index fba64efece8b4782dc4566b62949aea4ac74f323..962358d761c3ff4c5c14f0c484cdf465e3d20cfd 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -24,23 +24,22 @@ limitations under the License. */ #include #endif -DEFINE_string(dirname, "", "Directory of the inference model."); +DEFINE_string(modelpath, "", "Directory of the inference model."); +DEFINE_string(datafile, "", "File of input index data."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); -DEFINE_bool(prepare_context, true, "Prepare Context before executor"); - DEFINE_int32(num_threads, 1, "Number of threads should be used"); -inline double get_current_ms() { +inline double GetCurrentMs() { struct timeval time; gettimeofday(&time, NULL); return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } // return size of total words -size_t read_datasets(std::vector* out, - const std::string& filename) { +size_t LoadData(std::vector* out, + const std::string& filename) { size_t sz = 0; std::fstream fin(filename); std::string line; @@ -68,6 +67,23 @@ size_t read_datasets(std::vector* out, return sz; } +void SplitData( + const std::vector& datasets, + std::vector>* jobs, + const int num_threads) { + size_t s = 0; + jobs->resize(num_threads); + while (s < datasets.size()) { + for (auto it = jobs->begin(); it != jobs->end(); it++) { + it->emplace_back(&datasets[s]); + s++; + if (s >= datasets.size()) { + break; + } + } + } +} + void ThreadRunInfer( const int tid, paddle::framework::Executor* executor, paddle::framework::Scope* scope, @@ -80,7 +96,6 @@ void ThreadRunInfer( copy_program->SetFeedHolderName(feed_holder_name); copy_program->SetFetchHolderName(fetch_holder_name); - // 3. Get the feed_target_names and fetch_target_names const std::vector& feed_target_names = copy_program->GetFeedTargetNames(); const std::vector& fetch_target_names = @@ -95,51 +110,32 @@ void ThreadRunInfer( PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); auto& inputs = jobs[tid]; - auto start_ms = get_current_ms(); + auto start_ms = GetCurrentMs(); for (size_t i = 0; i < inputs.size(); ++i) { feed_targets[feed_target_names[0]] = inputs[i]; executor->Run(*copy_program, scope, &feed_targets, &fetch_targets, true, true, feed_holder_name, fetch_holder_name); } - auto stop_ms = get_current_ms(); + auto stop_ms = GetCurrentMs(); LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() << " samples, avg time per sample: " - << (stop_ms - start_ms) / inputs.size() << " ms"; } -void bcast_datasets( - const std::vector& datasets, - std::vector>* jobs, - const int num_threads) { - size_t s = 0; - jobs->resize(num_threads); - while (s < datasets.size()) { - for (auto it = jobs->begin(); it != jobs->end(); it++) { - it->emplace_back(&datasets[s]); - s++; - if (s >= datasets.size()) { - break; - } - } - } -} - TEST(inference, nlp) { - if (FLAGS_dirname.empty()) { - LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + if (FLAGS_modelpath.empty() || FLAGS_datafile.empty()) { + LOG(FATAL) << "Usage: ./example --modelpath=path/to/your/model " + << "--datafile=path/to/your/data"; } - LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; - std::string dirname = FLAGS_dirname; + LOG(INFO) << "Model Path: " << FLAGS_modelpath; + LOG(INFO) << "Data File: " << FLAGS_datafile; std::vector datasets; - size_t num_total_words = - read_datasets(&datasets, "/home/tangjian/paddle-tj/out.ids.txt"); - LOG(INFO) << "Number of dataset samples(seq len<1024): " << datasets.size(); + size_t num_total_words = LoadData(&datasets, FLAGS_datafile); + LOG(INFO) << "Number of samples (seq_len<1024): " << datasets.size(); LOG(INFO) << "Total number of words: " << num_total_words; const bool model_combined = false; - // 0. Call `paddle::framework::InitDevices()` initialize all the devices // 1. Define place, executor, scope auto place = paddle::platform::CPUPlace(); @@ -148,13 +144,14 @@ TEST(inference, nlp) { // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; - inference_program = InitProgram(&executor, scope, dirname, model_combined); + inference_program = + InitProgram(&executor, scope, FLAGS_modelpath, model_combined); if (FLAGS_use_mkldnn) { EnableMKLDNN(inference_program); } #ifdef PADDLE_WITH_MKLML - // only use 1 core per thread + // only use 1 thread number per std::thread omp_set_dynamic(0); omp_set_num_threads(1); mkl_set_num_threads(1); @@ -163,24 +160,23 @@ TEST(inference, nlp) { double start_ms = 0, stop_ms = 0; if (FLAGS_num_threads > 1) { std::vector> jobs; - bcast_datasets(datasets, &jobs, FLAGS_num_threads); + SplitData(datasets, &jobs, FLAGS_num_threads); std::vector> threads; for (int i = 0; i < FLAGS_num_threads; ++i) { threads.emplace_back(new std::thread(ThreadRunInfer, i, &executor, scope, std::ref(inference_program), std::ref(jobs))); } - start_ms = get_current_ms(); + start_ms = GetCurrentMs(); for (int i = 0; i < FLAGS_num_threads; ++i) { threads[i]->join(); } - stop_ms = get_current_ms(); - + stop_ms = GetCurrentMs(); } else { if (FLAGS_prepare_vars) { executor.CreateVariables(*inference_program, scope, 0); } - // always prepare context and burning first time + // always prepare context std::unique_ptr ctx; ctx = executor.Prepare(*inference_program, 0); @@ -198,14 +194,14 @@ TEST(inference, nlp) { PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); std::map feed_targets; - // for data and run - start_ms = get_current_ms(); + // feed data and run + start_ms = GetCurrentMs(); for (size_t i = 0; i < datasets.size(); ++i) { feed_targets[feed_target_names[0]] = &(datasets[i]); executor.RunPreparedContext(ctx.get(), scope, &feed_targets, &fetch_targets, !FLAGS_prepare_vars); } - stop_ms = get_current_ms(); + stop_ms = GetCurrentMs(); } LOG(INFO) << "Total inference time with " << FLAGS_num_threads diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 1f5551567c6881775c09bd1e52a713261ee87a16..01b8dc0be662da22fe15a79cd9abfe5fa92c9577 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -182,9 +182,6 @@ void TestInference(const std::string& dirname, "init_program", paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); - // std::string binary_str; - // inference_program->Proto()->SerializeToString(&binary_str); - // LOG(INFO) << binary_str; if (use_mkldnn) { EnableMKLDNN(inference_program); }