提交 1f676247 编写于 作者: P Peter Zijlstra 提交者: Ingo Molnar

x86/alternatives: Implement a better poke_int3_handler() completion scheme

Commit:

  285a54ef ("x86/alternatives: Sync bp_patching update for avoiding NULL pointer exception")

added an additional text_poke_sync() IPI to text_poke_bp_batch() to
handle the rare case where another CPU is still inside an INT3 handler
while we clear the global state.

Instead of spraying IPIs around, count the active INT3 handlers and
wait for them to go away before proceeding to clear/reuse the data.
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: NMasami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: NDaniel Bristot de Oliveira <bristot@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NIngo Molnar <mingo@kernel.org>
上级 46f5cfc1
...@@ -948,10 +948,29 @@ struct text_poke_loc { ...@@ -948,10 +948,29 @@ struct text_poke_loc {
const u8 text[POKE_MAX_OPCODE_SIZE]; const u8 text[POKE_MAX_OPCODE_SIZE];
}; };
static struct bp_patching_desc { struct bp_patching_desc {
struct text_poke_loc *vec; struct text_poke_loc *vec;
int nr_entries; int nr_entries;
} bp_patching; atomic_t refs;
};
static struct bp_patching_desc *bp_desc;
static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
{
struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
if (!desc || !atomic_inc_not_zero(&desc->refs))
return NULL;
return desc;
}
static inline void put_desc(struct bp_patching_desc *desc)
{
smp_mb__before_atomic();
atomic_dec(&desc->refs);
}
static inline void *text_poke_addr(struct text_poke_loc *tp) static inline void *text_poke_addr(struct text_poke_loc *tp)
{ {
...@@ -972,26 +991,26 @@ NOKPROBE_SYMBOL(patch_cmp); ...@@ -972,26 +991,26 @@ NOKPROBE_SYMBOL(patch_cmp);
int notrace poke_int3_handler(struct pt_regs *regs) int notrace poke_int3_handler(struct pt_regs *regs)
{ {
struct bp_patching_desc *desc;
struct text_poke_loc *tp; struct text_poke_loc *tp;
int len, ret = 0;
void *ip; void *ip;
int len;
if (user_mode(regs))
return 0;
/* /*
* Having observed our INT3 instruction, we now must observe * Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries. * bp_desc:
* *
* nr_entries != 0 INT3 * bp_desc = desc INT3
* WMB RMB * WMB RMB
* write INT3 if (nr_entries) * write INT3 if (desc)
*
* Idem for other elements in bp_patching.
*/ */
smp_rmb(); smp_rmb();
if (likely(!bp_patching.nr_entries)) desc = try_get_desc(&bp_desc);
return 0; if (!desc)
if (user_mode(regs))
return 0; return 0;
/* /*
...@@ -1002,16 +1021,16 @@ int notrace poke_int3_handler(struct pt_regs *regs) ...@@ -1002,16 +1021,16 @@ int notrace poke_int3_handler(struct pt_regs *regs)
/* /*
* Skip the binary search if there is a single member in the vector. * Skip the binary search if there is a single member in the vector.
*/ */
if (unlikely(bp_patching.nr_entries > 1)) { if (unlikely(desc->nr_entries > 1)) {
tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, tp = bsearch(ip, desc->vec, desc->nr_entries,
sizeof(struct text_poke_loc), sizeof(struct text_poke_loc),
patch_cmp); patch_cmp);
if (!tp) if (!tp)
return 0; goto out_put;
} else { } else {
tp = bp_patching.vec; tp = desc->vec;
if (text_poke_addr(tp) != ip) if (text_poke_addr(tp) != ip)
return 0; goto out_put;
} }
len = text_opcode_size(tp->opcode); len = text_opcode_size(tp->opcode);
...@@ -1023,7 +1042,7 @@ int notrace poke_int3_handler(struct pt_regs *regs) ...@@ -1023,7 +1042,7 @@ int notrace poke_int3_handler(struct pt_regs *regs)
* Someone poked an explicit INT3, they'll want to handle it, * Someone poked an explicit INT3, they'll want to handle it,
* do not consume. * do not consume.
*/ */
return 0; goto out_put;
case CALL_INSN_OPCODE: case CALL_INSN_OPCODE:
int3_emulate_call(regs, (long)ip + tp->rel32); int3_emulate_call(regs, (long)ip + tp->rel32);
...@@ -1038,7 +1057,11 @@ int notrace poke_int3_handler(struct pt_regs *regs) ...@@ -1038,7 +1057,11 @@ int notrace poke_int3_handler(struct pt_regs *regs)
BUG(); BUG();
} }
return 1; ret = 1;
out_put:
put_desc(desc);
return ret;
} }
NOKPROBE_SYMBOL(poke_int3_handler); NOKPROBE_SYMBOL(poke_int3_handler);
...@@ -1069,14 +1092,18 @@ static int tp_vec_nr; ...@@ -1069,14 +1092,18 @@ static int tp_vec_nr;
*/ */
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{ {
struct bp_patching_desc desc = {
.vec = tp,
.nr_entries = nr_entries,
.refs = ATOMIC_INIT(1),
};
unsigned char int3 = INT3_INSN_OPCODE; unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i; unsigned int i;
int do_sync; int do_sync;
lockdep_assert_held(&text_mutex); lockdep_assert_held(&text_mutex);
bp_patching.vec = tp; smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
bp_patching.nr_entries = nr_entries;
/* /*
* Corresponding read barrier in int3 notifier for making sure the * Corresponding read barrier in int3 notifier for making sure the
...@@ -1131,17 +1158,12 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries ...@@ -1131,17 +1158,12 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
text_poke_sync(); text_poke_sync();
/* /*
* sync_core() implies an smp_mb() and orders this store against * Remove and synchronize_rcu(), except we have a very primitive
* the writing of the new instruction. * refcount based completion.
*/ */
bp_patching.nr_entries = 0; WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
/* if (!atomic_dec_and_test(&desc.refs))
* This sync_core () call ensures that all INT3 handlers in progress atomic_cond_read_acquire(&desc.refs, !VAL);
* have finished. This allows poke_int3_handler() after this to
* avoid touching bp_paching.vec by checking nr_entries == 0.
*/
text_poke_sync();
bp_patching.vec = NULL;
} }
void text_poke_loc_init(struct text_poke_loc *tp, void *addr, void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册