/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _GEN_CNA_LOCK_SLOWPATH #error "do not include this file" #endif #include #include /* * Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock). * * In CNA, spinning threads are organized in two queues, a main queue for * threads running on the same node as the current lock holder, and a * secondary queue for threads running on other nodes. At the unlock time, * the lock holder scans the main queue looking for a thread running on * the same node. If found (call it thread T), all threads in the main queue * between the current lock holder and T are moved to the end of the * secondary queue, and the lock is passed to T. If such T is not found, the * lock is passed to the first node in the secondary queue. Finally, if the * secondary queue is empty, the lock is passed to the next thread in the * main queue. To avoid starvation of threads in the secondary queue, * those threads are moved back to the head of the main queue * after a certain expected number of intra-node lock hand-offs. * * For more details, see https://arxiv.org/abs/1810.05600. * * Authors: Alex Kogan * Dave Dice */ struct cna_node { struct mcs_spinlock mcs; u32 numa_node; u32 encoded_tail; struct cna_node *tail; /* points to the secondary queue tail */ }; #define CNA_NODE(ptr) ((struct cna_node *)(ptr)) /* Per-CPU pseudo-random number seed */ static DEFINE_PER_CPU(u32, seed); /* * Controls the probability for intra-node lock hand-off. It can be * tuned and depend, e.g., on the number of CPUs per node. For now, * choose a value that provides reasonable long-term fairness without * sacrificing performance compared to a version that does not have any * fairness guarantees. */ #define INTRA_NODE_HANDOFF_PROB_ARG 0x10000 /* * Return false with probability 1 / @range. * @range must be a power of 2. */ static bool probably(unsigned int range) { u32 s; s = this_cpu_read(seed); s = next_pseudo_random32(s); this_cpu_write(seed, s); return s & (range - 1); } static void cna_init_node(struct mcs_spinlock *node) { struct cna_node *cn = CNA_NODE(node); struct mcs_spinlock *base_node; int cpuid; BUILD_BUG_ON(sizeof(struct cna_node) > sizeof(struct qnode)); /* we store a pointer in the node's @locked field */ BUILD_BUG_ON(sizeof(uintptr_t) > sizeof_field(struct mcs_spinlock, locked)); cpuid = smp_processor_id(); cn->numa_node = cpu_to_node(cpuid); base_node = this_cpu_ptr(&qnodes[0].mcs); cn->encoded_tail = encode_tail(cpuid, base_node->count - 1); } /** * find_successor - Scan the main waiting queue looking for the first * thread running on the same node as the lock holder. If found (call it * thread T), move all threads in the main queue between the lock holder * and T to the end of the secondary queue and return T; otherwise, return NULL. */ static struct cna_node *find_successor(struct mcs_spinlock *me) { struct cna_node *me_cna = CNA_NODE(me); struct cna_node *head_other, *tail_other, *cur; struct cna_node *next = CNA_NODE(READ_ONCE(me->next)); int my_node; /* @next should be set, else we would not be calling this function. */ WARN_ON_ONCE(next == NULL); my_node = me_cna->numa_node; /* * Fast path - check whether the immediate successor runs on * the same node. */ if (next->numa_node == my_node) return next; head_other = next; tail_other = next; /* * Traverse the main waiting queue starting from the successor of my * successor, and look for a thread running on the same node. */ cur = CNA_NODE(READ_ONCE(next->mcs.next)); while (cur) { if (cur->numa_node == my_node) { /* * Found a thread on the same node. Move threads * between me and that node into the secondary queue. */ if (me->locked > 1) CNA_NODE(me->locked)->tail->mcs.next = (struct mcs_spinlock *)head_other; else me->locked = (uintptr_t)head_other; tail_other->mcs.next = NULL; CNA_NODE(me->locked)->tail = tail_other; return cur; } tail_other = cur; cur = CNA_NODE(READ_ONCE(cur->mcs.next)); } return NULL; } static inline bool cna_set_locked_empty_mcs(struct qspinlock *lock, u32 val, struct mcs_spinlock *node) { /* Check whether the secondary queue is empty. */ if (node->locked <= 1) { if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) return true; /* No contention */ } else { /* * Pass the lock to the first thread in the secondary * queue, but first try to update the queue's tail to * point to the last node in the secondary queue. */ struct cna_node *succ = CNA_NODE(node->locked); u32 new = succ->tail->encoded_tail + _Q_LOCKED_VAL; if (atomic_try_cmpxchg_relaxed(&lock->val, &val, new)) { arch_mcs_spin_unlock_contended(&succ->mcs.locked, 1); return true; } } return false; } static inline void cna_pass_mcs_lock(struct mcs_spinlock *node, struct mcs_spinlock *next) { struct cna_node *succ = NULL; u64 *var = &next->locked; u64 val = 1; /* * Try to pass the lock to a thread running on the same node. * For long-term fairness, search for such a thread with high * probability rather than always. */ if (probably(INTRA_NODE_HANDOFF_PROB_ARG)) succ = find_successor(node); if (succ) { var = &succ->mcs.locked; /* * We unlock a successor by passing a non-zero value, * so set @val to 1 iff @locked is 0, which will happen * if we acquired the MCS lock when its queue was empty */ val = node->locked + (node->locked == 0); } else if (node->locked > 1) { /* if the secondary queue is not empty */ /* pass the lock to the first node in that queue */ succ = CNA_NODE(node->locked); succ->tail->mcs.next = next; var = &succ->mcs.locked; } /* * Otherwise, pass the lock to the immediate successor * in the main queue. */ arch_mcs_spin_unlock_contended(var, val); }