[LITE][BM] multi thread ok, test=develop

4524138a · baolei.an · e414f352 · 4524138a · 4524138a · 4524138a
4 changed file
--- a/lite/api/test_classify_lite_bm.cc
+++ b/lite/api/test_classify_lite_bm.cc
@@ -14,6 +14,7 @@
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
+#include <pthread.h>
 #include <fstream>
 #include <vector>
 #include "lite/api/cxx_api.h"
@@ -30,14 +31,18 @@ DEFINE_string(input_img_txt_path,
 namespace paddle {
 namespace lite {
-void TestModel(const std::vector<Place>& valid_places) {
+const int g_batch_size = 4;
+const int g_thread_num = 10;
+void instance_run() {
  lite::Predictor predictor;
  std::vector<std::string> passes;
+  std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
+                                   Place{TARGET(kX86), PRECISION(kFloat)}});
  predictor.Build(FLAGS_model_dir, "", "", valid_places, passes);
  auto* input_tensor = predictor.GetInput(0);
-  input_tensor->Resize(DDim(
+  input_tensor->Resize(DDim(std::vector<DDim::value_type>(
-      std::vector<DDim::value_type>({1, 3, FLAGS_im_height, FLAGS_im_width})));
+      {g_batch_size, 3, FLAGS_im_height, FLAGS_im_width})));
  auto* data = input_tensor->mutable_data<float>();
  auto item_size = input_tensor->dims().production();
  if (FLAGS_input_img_txt_path.empty()) {
@@ -45,12 +50,15 @@ void TestModel(const std::vector<Place>& valid_places) {
      data[i] = 1;
    }
  } else {
-    std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
+    for (int j = 0; j < g_batch_size; j++) {
-    if (!fs.is_open()) {
+      std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
-      LOG(FATAL) << "open input_img_txt error.";
+      if (!fs.is_open()) {
-    }
+        LOG(FATAL) << "open input_img_txt error.";
-    for (int i = 0; i < item_size; i++) {
+      }
-      fs >> data[i];
+      for (int i = 0; i < item_size / g_batch_size; i++) {
+        fs >> data[i];
+      }
+      data += j * item_size / g_batch_size;
    }
  }
  for (int i = 0; i < FLAGS_warmup; ++i) {
@@ -72,6 +80,7 @@ void TestModel(const std::vector<Place>& valid_places) {
  FILE* fp = fopen("result.txt", "wb");
  for (int i = 0; i < out.size(); i++) {
    auto* out_data = out[i]->data<float>();
+    LOG(INFO) << out[i]->numel();
    for (int j = 0; j < out[i]->numel(); j++) {
      fprintf(fp, "%f\n", out_data[j]);
    }
@@ -79,6 +88,16 @@ void TestModel(const std::vector<Place>& valid_places) {
  fclose(fp);
 }
+void TestModel(const std::vector<Place>& valid_places) {
+  std::vector<std::unique_ptr<std::thread>> instances_vec;
+  for (int i = 0; i < g_thread_num; ++i) {
+    instances_vec.emplace_back(new std::thread(&instance_run));
+  }
+  for (int i = 0; i < g_thread_num; ++i) {
+    instances_vec[i]->join();
+  }
+}
 TEST(Classify, test_bm) {
  std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
                                   Place{TARGET(kX86), PRECISION(kFloat)}});

--- a/lite/kernels/bm/bridges/graph.cc
+++ b/lite/kernels/bm/bridges/graph.cc
@@ -20,11 +20,14 @@ namespace lite {
 namespace subgraph {
 namespace bm {
+pthread_mutex_t Graph::mutex_compiler_ = PTHREAD_MUTEX_INITIALIZER;
 void Graph::AddNode(const std::string& name) {
  nodes_.insert(std::make_pair(name, name));
 }
 void Graph::CreateCompilerHandle() {
+  pthread_mutex_lock(&mutex_compiler_);
 #ifdef BM1682
  compiler_handle_ = create_bmcompiler("BM1682");
 #else
@@ -33,6 +36,8 @@ void Graph::CreateCompilerHandle() {
  CHECK(compiler_handle_ != nullptr);
 }
+void Graph::UnlockCompilerMutex() { pthread_mutex_unlock(&mutex_compiler_); }
 }  // namespace bm
 }  // namespace subgraph
 }  // namespace lite

--- a/lite/kernels/bm/bridges/graph.h
+++ b/lite/kernels/bm/bridges/graph.h
@@ -14,6 +14,7 @@
 #pragma once
+#include <pthread.h>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -36,10 +37,12 @@ class Graph {
  }
  void CreateCompilerHandle();
  void* GetCompilerHandle() { return compiler_handle_; }
+  void UnlockCompilerMutex();
 private:
  std::unordered_map<std::string, std::string> nodes_;
  void* compiler_handle_;
+  static pthread_mutex_t mutex_compiler_;
 };
 }  // namespace bm

--- a/lite/kernels/bm/subgraph_compute.cc
+++ b/lite/kernels/bm/subgraph_compute.cc
@@ -40,6 +40,7 @@ int SubgraphEngine::BuildDeviceProgram() {
    op->CheckShape();
    op->InferShape();
    std::string op_type = op->op_info()->Type();
+    LOG(INFO) << op_type;
    if (!bridges.Exists(op_type, TARGET(kBM))) {
      return subgraph::FAILED;
    }
@@ -59,6 +60,7 @@ int SubgraphEngine::BuildDeviceProgram() {
  unsigned int data_size = 0;
  bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
  finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
+  graph.UnlockCompilerMutex();
  bmrt_hd_ = bmrt_create(bm_hd_);
  if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
    return subgraph::FAILED;