未验证 提交 a9ea41c5 编写于 作者: L liutiexing 提交者: GitHub

Spinlock (#36030)

* add align for WorkQueue

* add spinlock

* merge spinlock
上级 79bd5f90
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
#include <cstdint> #include <cstdint>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include "paddle/fluid/framework/new_executor/workqueue_utils.h"
#include "paddle/fluid/memory/allocation/spin_lock.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -101,7 +103,7 @@ class RunQueue { ...@@ -101,7 +103,7 @@ class RunQueue {
// PushBack adds w at the end of the queue. // PushBack adds w at the end of the queue.
// If queue is full returns w, otherwise returns default-constructed Work. // If queue is full returns w, otherwise returns default-constructed Work.
Work PushBack(Work w) { Work PushBack(Work w) {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed); unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[(back - 1) & kMask]; Elem* e = &array_[(back - 1) & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed); uint8_t s = e->state.load(std::memory_order_relaxed);
...@@ -123,7 +125,7 @@ class RunQueue { ...@@ -123,7 +125,7 @@ class RunQueue {
return Work(); return Work();
} }
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed); unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[back & kMask]; Elem* e = &array_[back & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed); uint8_t s = e->state.load(std::memory_order_relaxed);
...@@ -145,7 +147,7 @@ class RunQueue { ...@@ -145,7 +147,7 @@ class RunQueue {
return 0; return 0;
} }
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed); unsigned back = back_.load(std::memory_order_relaxed);
unsigned size = Size(); unsigned size = Size();
unsigned mid = back; unsigned mid = back;
...@@ -213,7 +215,7 @@ class RunQueue { ...@@ -213,7 +215,7 @@ class RunQueue {
// modification counters. // modification counters.
alignas(64) std::atomic<unsigned> front_; alignas(64) std::atomic<unsigned> front_;
alignas(64) std::atomic<unsigned> back_; alignas(64) std::atomic<unsigned> back_;
std::mutex mutex_; paddle::memory::SpinLock mutex_;
Elem array_[kSize]; Elem array_[kSize];
// SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false, // SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false,
......
...@@ -166,7 +166,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue( ...@@ -166,7 +166,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue(
"WorkQueueOptions.num_threads must be " "WorkQueueOptions.num_threads must be "
"greater than 1.")); "greater than 1."));
std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options)); std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options));
return ptr; return std::move(ptr);
} }
std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup( std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
...@@ -176,7 +176,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup( ...@@ -176,7 +176,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
"For a WorkQueueGroup, the number of WorkQueueOptions " "For a WorkQueueGroup, the number of WorkQueueOptions "
"must be greater than 1.")); "must be greater than 1."));
std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options)); std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options));
return ptr; return std::move(ptr);
} }
} // namespace framework } // namespace framework
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <atomic>
#include <cassert> #include <cassert>
#include <cstddef> #include <cstddef>
#include <cstdlib> #include <cstdlib>
......
...@@ -15,37 +15,48 @@ ...@@ -15,37 +15,48 @@
#pragma once #pragma once
#include <atomic> #include <atomic>
#if !defined(_WIN32) #if defined(_M_X64) || defined(__x86_64__) || defined(_M_IX86) || \
#include <sched.h> defined(__i386__)
#else #define __PADDLE_x86__
#include <windows.h> #include <immintrin.h>
#endif // !_WIN32 #endif
#include <thread>
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
static inline void CpuRelax() {
#if defined(__PADDLE_x86__)
_mm_pause();
#endif
}
class SpinLock { class SpinLock {
public: public:
SpinLock() : mlock_(false) {} SpinLock() : mlock_(false) {}
void lock() { void lock() {
bool expect = false; for (;;) {
uint64_t spin_cnt = 0; if (!mlock_.exchange(true, std::memory_order_acquire)) {
while (!mlock_.compare_exchange_weak(expect, true)) { break;
expect = false; }
if ((++spin_cnt & 0xFF) == 0) { constexpr int kMaxLoop = 32;
#if defined(_WIN32) for (int loop = 1; mlock_.load(std::memory_order_relaxed);) {
SleepEx(50, FALSE); if (loop <= kMaxLoop) {
#else for (int i = 1; i <= loop; ++i) {
sched_yield(); CpuRelax();
#endif }
loop *= 2;
} else {
std::this_thread::yield();
}
} }
} }
} }
void unlock() { mlock_.store(false); } void unlock() { mlock_.store(false, std::memory_order_release); }
DISABLE_COPY_AND_ASSIGN(SpinLock); DISABLE_COPY_AND_ASSIGN(SpinLock);
private: private:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册