Update MKLDNN integration framework to support Paddle multi-instances

Make all blob info saved in global device context to be thread based. Meanwhile save thread id in thread local storage in ParallelDo

Update MKLDNN integration framework to support Paddle multi-instances
Make all blob info saved in global device context to be thread based. Meanwhile save thread id in thread local storage in ParallelDo
a53e8a8d · Brian Liu · Sylwester Fraczek · 2256fae4 · a53e8a8d · a53e8a8d
隐藏空白更改
内联并排

Showing with 58 addition and 17 deletion

paddle/fluid/platform/device_context.cc paddle/fluid/platform/device_context.cc +50 -15

paddle/fluid/platform/device_context.h paddle/fluid/platform/device_context.h +8 -2

未找到文件。
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -25,6 +25,14 @@ namespace platform {

 DeviceContextPool* DeviceContextPool::pool = nullptr;

+namespace {
+// Current thread's id.
+thread_local int cur_thread_id = 0;
+}
+
+void set_cur_thread_id(int tid) { cur_thread_id = tid; }
+int get_cur_thread_id(void) { return cur_thread_id; }
+
 platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
  auto it = device_contexts_.find(place);
  if (it == device_contexts_.end()) {
@@ -296,38 +304,65 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; }

 #ifdef PADDLE_WITH_MKLDNN
 MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
-    : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobs_() {
-  p_blobs_.reset(new std::unordered_map<std::string, std::shared_ptr<void>>());
+    : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobmap_() {
+  p_blobmap_.reset(new BlobMap());
+  p_mutex_.reset(new std::mutex());
 }

 void MKLDNNDeviceContext::SetBlob(const std::string& name,
                                  std::shared_ptr<void> data) const {
-  std::unordered_map<std::string, std::shared_ptr<void>>* p;
-  p = p_blobs_.get();
+  BlobMap* pMap = p_blobmap_.get();
+  std::shared_ptr<KeyBlob> pBlob = nullptr;
+
+  int tid = platform::get_cur_thread_id();

-  auto it = p->find(name);
+  std::lock_guard<std::mutex> lock(*p_mutex_.get());

-  if (it == p->end()) {
-    (*p)[name] = data;  // create new blob
+  // Find KeyBlob for current thread
+  auto map_it = pMap->find(tid);
+
+  if (map_it == pMap->end()) {
+    // 1st time to set blob in current thread
+    pBlob = std::shared_ptr<KeyBlob>(new KeyBlob());
+    (*pMap)[tid] = pBlob;
  } else {
-    it->second = data;  // set data to existing blob
+    pBlob = map_it->second;
  }

+  // Find Key in found (or newly created) KeyBlob
+  auto key_it = pBlob->find(name);
+
+  if (key_it == pBlob->end()) {
+    (*pBlob)[name] = data;  // create new blob
+  } else {
+    key_it->second = data;  // set data to existing blob
+  }
+
+  // lock will be automatically released when out of scope
  return;
 }

 std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
    const std::string& name) const {
-  std::unordered_map<std::string, std::shared_ptr<void>>* p;
-  p = p_blobs_.get();
+  BlobMap* pMap = p_blobmap_.get();
+  std::shared_ptr<KeyBlob> pBlob = nullptr;

-  auto it = p->find(name);
+  int tid = platform::get_cur_thread_id();

-  if (it != p->end()) {
-    return it->second;
-  }
+  std::lock_guard<std::mutex> lock(*p_mutex_.get());
+
+  // Find KeyBlob for current thread firstly
+  auto map_it = pMap->find(tid);
+  if (map_it == pMap->end()) return nullptr;
+  pBlob = map_it->second;
+
+  // Find Blob via name
+  auto key_it = pBlob->find(name);
+
+  if (key_it == pBlob->end()) return nullptr;

-  return nullptr;
+  // lock will be automatically released when out of scope
+  return key_it->second;
 }

 #endif

--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -39,6 +39,12 @@ limitations under the License. */
 namespace paddle {
 namespace platform {

+using KeyBlob = std::unordered_map<std::string, std::shared_ptr<void>>;
+using BlobMap = std::unordered_map<int, std::shared_ptr<KeyBlob>>;
+
+void set_cur_thread_id(int);
+int get_cur_thread_id(void);
+
 class DeviceContext {
 public:
  virtual ~DeviceContext() {}
@@ -191,8 +197,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext {

 private:
  mkldnn::engine engine_;
-  std::shared_ptr<std::unordered_map<std::string, std::shared_ptr<void>>>
-      p_blobs_;
+  std::shared_ptr<BlobMap> p_blobmap_;
+  std::shared_ptr<std::mutex> p_mutex_;
 };
 #endif