add thread setting

a4822ed8 · tensor-tang · 53875625 · a4822ed8
隐藏空白更改
内联并排

Showing with 21 addition and 5 deletion

paddle/fluid/inference/tests/book/test_inference_nlp.cc paddle/fluid/inference/tests/book/test_inference_nlp.cc +21 -5

未找到文件。
--- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
@@ -19,6 +19,10 @@ limitations under the License. */
 #include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/inference/tests/test_helper.h"
+#ifdef PADDLE_WITH_MKLML
+#include <mkl_service.h>
+#include <omp.h>
+#endif
 DEFINE_string(dirname, "", "Directory of the inference model.");
 DEFINE_int32(repeat, 100, "Running the inference program repeat times");
@@ -149,6 +153,14 @@ TEST(inference, nlp) {
    EnableMKLDNN(inference_program);
  }
+#ifdef PADDLE_WITH_MKLML
+  // only use 1 core per thread
+  omp_set_dynamic(0);
+  omp_set_num_threads(1);
+  mkl_set_num_threads(1);
+#endif
+  double start_ms = 0, stop_ms = 0;
  if (FLAGS_num_threads > 1) {
    std::vector<std::vector<const paddle::framework::LoDTensor*>> jobs;
    bcast_datasets(datasets, &jobs, FLAGS_num_threads);
@@ -158,9 +170,11 @@ TEST(inference, nlp) {
                                           std::ref(inference_program),
                                           std::ref(jobs)));
    }
+    start_ms = get_current_ms();
    for (int i = 0; i < FLAGS_num_threads; ++i) {
      threads[i]->join();
    }
+    stop_ms = get_current_ms();
  } else {
    if (FLAGS_prepare_vars) {
@@ -185,16 +199,18 @@ TEST(inference, nlp) {
    std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
    // for data and run
-    auto start_ms = get_current_ms();
+    start_ms = get_current_ms();
    for (size_t i = 0; i < datasets.size(); ++i) {
      feed_targets[feed_target_names[0]] = &(datasets[i]);
      executor.RunPreparedContext(ctx.get(), scope, &feed_targets,
                                  &fetch_targets, !FLAGS_prepare_vars);
    }
-    auto stop_ms = get_current_ms();
+    stop_ms = get_current_ms();
-    LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60
-              << " min, avg time per seq: "
-              << (stop_ms - start_ms) / datasets.size() << " ms";
  }
+  LOG(INFO) << "Total inference time with " << FLAGS_num_threads
+            << " threads : " << (stop_ms - start_ms) / 1000.0
+            << " sec, avg time per seq: "
+            << (stop_ms - start_ms) / datasets.size() << " ms";
  delete scope;
 }