未验证 提交 a9ea41c5 编写于 作者: L liutiexing 提交者: GitHub

Spinlock (#36030)

* add align for WorkQueue

* add spinlock

* merge spinlock
上级 79bd5f90
......@@ -37,6 +37,8 @@
#include <cstdint>
#include <mutex>
#include <vector>
#include "paddle/fluid/framework/new_executor/workqueue_utils.h"
#include "paddle/fluid/memory/allocation/spin_lock.h"
namespace paddle {
namespace framework {
......@@ -101,7 +103,7 @@ class RunQueue {
// PushBack adds w at the end of the queue.
// If queue is full returns w, otherwise returns default-constructed Work.
Work PushBack(Work w) {
std::unique_lock<std::mutex> lock(mutex_);
std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[(back - 1) & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
......@@ -123,7 +125,7 @@ class RunQueue {
return Work();
}
std::unique_lock<std::mutex> lock(mutex_);
std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed);
Elem* e = &array_[back & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed);
......@@ -145,7 +147,7 @@ class RunQueue {
return 0;
}
std::unique_lock<std::mutex> lock(mutex_);
std::unique_lock<paddle::memory::SpinLock> lock(mutex_);
unsigned back = back_.load(std::memory_order_relaxed);
unsigned size = Size();
unsigned mid = back;
......@@ -213,7 +215,7 @@ class RunQueue {
// modification counters.
alignas(64) std::atomic<unsigned> front_;
alignas(64) std::atomic<unsigned> back_;
std::mutex mutex_;
paddle::memory::SpinLock mutex_;
Elem array_[kSize];
// SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false,
......
......@@ -166,7 +166,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue(
"WorkQueueOptions.num_threads must be "
"greater than 1."));
std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options));
return ptr;
return std::move(ptr);
}
std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
......@@ -176,7 +176,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
"For a WorkQueueGroup, the number of WorkQueueOptions "
"must be greater than 1."));
std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options));
return ptr;
return std::move(ptr);
}
} // namespace framework
......
......@@ -14,6 +14,7 @@
#pragma once
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdlib>
......
......@@ -15,37 +15,48 @@
#pragma once
#include <atomic>
#if !defined(_WIN32)
#include <sched.h>
#else
#include <windows.h>
#endif // !_WIN32
#if defined(_M_X64) || defined(__x86_64__) || defined(_M_IX86) || \
defined(__i386__)
#define __PADDLE_x86__
#include <immintrin.h>
#endif
#include <thread>
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace memory {
static inline void CpuRelax() {
#if defined(__PADDLE_x86__)
_mm_pause();
#endif
}
class SpinLock {
public:
SpinLock() : mlock_(false) {}
void lock() {
bool expect = false;
uint64_t spin_cnt = 0;
while (!mlock_.compare_exchange_weak(expect, true)) {
expect = false;
if ((++spin_cnt & 0xFF) == 0) {
#if defined(_WIN32)
SleepEx(50, FALSE);
#else
sched_yield();
#endif
for (;;) {
if (!mlock_.exchange(true, std::memory_order_acquire)) {
break;
}
constexpr int kMaxLoop = 32;
for (int loop = 1; mlock_.load(std::memory_order_relaxed);) {
if (loop <= kMaxLoop) {
for (int i = 1; i <= loop; ++i) {
CpuRelax();
}
loop *= 2;
} else {
std::this_thread::yield();
}
}
}
}
void unlock() { mlock_.store(false, std::memory_order_release); }
void unlock() { mlock_.store(false); }
DISABLE_COPY_AND_ASSIGN(SpinLock);
private:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册