未验证 提交 1f74be50 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!419 Backport CVEs and bugfixes

Merge Pull Request from: @zhangjialin11 
 
Pull new CVEs:
CVE-2023-26545
CVE-2023-0045
CVE-2023-20938
CVE-2023-0240

rcu bugfix from Zheng Yejian
net bugfixes from Zhengchao Shao
block bugfix from Zhong Jinghua
md/raid10 bugfixes from Li Nan
arm/kasan bugfix from Longlong Xia 
 
Link:https://gitee.com/openeuler/kernel/pulls/419 

Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -1129,7 +1129,7 @@ export MODORDER := $(extmod-prefix)modules.order ...@@ -1129,7 +1129,7 @@ export MODORDER := $(extmod-prefix)modules.order
export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps
ifeq ($(KBUILD_EXTMOD),) ifeq ($(KBUILD_EXTMOD),)
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ io_uring/
vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
......
...@@ -137,6 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, ...@@ -137,6 +137,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
* thread information flags: * thread information flags:
* TIF_USEDFPU - FPU was used by this task this quantum (SMP) * TIF_USEDFPU - FPU was used by this task this quantum (SMP)
* TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
*
* Any bit in the range of 0..15 will cause do_work_pending() to be invoked.
*/ */
#define TIF_SIGPENDING 0 /* signal pending */ #define TIF_SIGPENDING 0 /* signal pending */
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */
...@@ -147,6 +149,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, ...@@ -147,6 +149,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */
#define TIF_PATCH_PENDING 8 /* pending live patching update */ #define TIF_PATCH_PENDING 8 /* pending live patching update */
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_USING_IWMMXT 17 #define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
...@@ -162,6 +165,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, ...@@ -162,6 +165,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
/* Checks for any syscall work in entry-common.S */ /* Checks for any syscall work in entry-common.S */
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
...@@ -171,7 +175,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, ...@@ -171,7 +175,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
* Change these and you break ASM code in entry-common.S * Change these and you break ASM code in entry-common.S
*/ */
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE) _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NOTIFY_SIGNAL)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* __ASM_ARM_THREAD_INFO_H */ #endif /* __ASM_ARM_THREAD_INFO_H */
...@@ -54,7 +54,7 @@ __ret_fast_syscall: ...@@ -54,7 +54,7 @@ __ret_fast_syscall:
cmp r2, r1 cmp r2, r1
blne addr_limit_check_failed blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK movs r1, r1, lsl #16
bne fast_work_pending bne fast_work_pending
...@@ -92,7 +92,7 @@ __ret_fast_syscall: ...@@ -92,7 +92,7 @@ __ret_fast_syscall:
cmp r2, r1 cmp r2, r1
blne addr_limit_check_failed blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK movs r1, r1, lsl #16
beq no_work_pending beq no_work_pending
UNWIND(.fnend ) UNWIND(.fnend )
ENDPROC(ret_fast_syscall) ENDPROC(ret_fast_syscall)
...@@ -134,7 +134,7 @@ ENTRY(ret_to_user_from_irq) ...@@ -134,7 +134,7 @@ ENTRY(ret_to_user_from_irq)
cmp r2, r1 cmp r2, r1
blne addr_limit_check_failed blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] ldr r1, [tsk, #TI_FLAGS]
tst r1, #_TIF_WORK_MASK movs r1, r1, lsl #16
bne slow_work_pending bne slow_work_pending
no_work_pending: no_work_pending:
asm_trace_hardirqs_on save = 0 asm_trace_hardirqs_on save = 0
......
...@@ -59,7 +59,7 @@ __irq_entry: ...@@ -59,7 +59,7 @@ __irq_entry:
get_thread_info tsk get_thread_info tsk
ldr r2, [tsk, #TI_FLAGS] ldr r2, [tsk, #TI_FLAGS]
tst r2, #_TIF_WORK_MASK movs r2, r2, lsl #16
beq 2f @ no work pending beq 2f @ no work pending
mov r0, #V7M_SCB_ICSR_PENDSVSET mov r0, #V7M_SCB_ICSR_PENDSVSET
str r0, [r1, V7M_SCB_ICSR] @ raise PendSV str r0, [r1, V7M_SCB_ICSR] @ raise PendSV
......
...@@ -655,7 +655,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) ...@@ -655,7 +655,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
if (unlikely(!user_mode(regs))) if (unlikely(!user_mode(regs)))
return 0; return 0;
local_irq_enable(); local_irq_enable();
if (thread_flags & _TIF_SIGPENDING) { if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
int restart = do_signal(regs, syscall); int restart = do_signal(regs, syscall);
if (unlikely(restart)) { if (unlikely(restart)) {
/* /*
......
...@@ -264,12 +264,17 @@ void __init kasan_init(void) ...@@ -264,12 +264,17 @@ void __init kasan_init(void)
/* /*
* 1. The module global variables are in MODULES_VADDR ~ MODULES_END, * 1. The module global variables are in MODULES_VADDR ~ MODULES_END,
* so we need to map this area. * so we need to map this area if CONFIG_KASAN_VMALLOC=n. With
* VMALLOC support KASAN will manage this region dynamically,
* refer to kasan_populate_vmalloc() and ARM's implementation of
* module_alloc().
* 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR
* ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't
* use kasan_populate_zero_shadow. * use kasan_populate_zero_shadow.
*/ */
create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE)); if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && IS_ENABLED(CONFIG_MODULES))
create_mapping((void *)MODULES_VADDR, (void *)(MODULES_END));
create_mapping((void *)PKMAP_BASE, (void *)(PKMAP_BASE + PMD_SIZE));
/* /*
* KAsan may reuse the contents of kasan_early_shadow_pte directly, so * KAsan may reuse the contents of kasan_early_shadow_pte directly, so
......
...@@ -69,6 +69,7 @@ void arch_release_task_struct(struct task_struct *tsk); ...@@ -69,6 +69,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
...@@ -101,13 +102,15 @@ void arch_release_task_struct(struct task_struct *tsk); ...@@ -101,13 +102,15 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE) #define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_32BIT_AARCH64 (1 << TIF_32BIT_AARCH64) #define _TIF_32BIT_AARCH64 (1 << TIF_32BIT_AARCH64)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT) _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
......
...@@ -711,7 +711,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, ...@@ -711,7 +711,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
(void __user *)NULL, current); (void __user *)NULL, current);
} }
if (thread_flags & _TIF_SIGPENDING) if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs); do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME) { if (thread_flags & _TIF_NOTIFY_RESUME) {
......
...@@ -99,6 +99,7 @@ void arch_setup_new_exec(void); ...@@ -99,6 +99,7 @@ void arch_setup_new_exec(void);
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_SIGPENDING 1 /* signal pending */ #define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */
#define TIF_SYSCALL_EMU 4 /* syscall emulation active */ #define TIF_SYSCALL_EMU 4 /* syscall emulation active */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
#define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_PATCH_PENDING 6 /* pending live patching update */
...@@ -124,6 +125,7 @@ void arch_setup_new_exec(void); ...@@ -124,6 +125,7 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_32BIT (1<<TIF_32BIT) #define _TIF_32BIT (1<<TIF_32BIT)
#define _TIF_RESTORE_TM (1<<TIF_RESTORE_TM) #define _TIF_RESTORE_TM (1<<TIF_RESTORE_TM)
...@@ -145,7 +147,8 @@ void arch_setup_new_exec(void); ...@@ -145,7 +147,8 @@ void arch_setup_new_exec(void);
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING) _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */ /* Bits in local_flags */
......
...@@ -318,7 +318,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) ...@@ -318,7 +318,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
if (thread_info_flags & _TIF_PATCH_PENDING) if (thread_info_flags & _TIF_PATCH_PENDING)
klp_update_patch_state(current); klp_update_patch_state(current);
if (thread_info_flags & _TIF_SIGPENDING) { if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
BUG_ON(regs != current->thread.regs); BUG_ON(regs != current->thread.regs);
do_signal(current); do_signal(current);
} }
......
...@@ -80,6 +80,7 @@ struct thread_info { ...@@ -80,6 +80,7 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
#define TIF_SECCOMP 8 /* syscall secure computing */ #define TIF_SECCOMP 8 /* syscall secure computing */
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
...@@ -88,9 +89,11 @@ struct thread_info { ...@@ -88,9 +89,11 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_WORK_MASK \ #define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_SYSCALL_WORK \ #define _TIF_SYSCALL_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
......
...@@ -310,7 +310,7 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, ...@@ -310,7 +310,7 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
unsigned long thread_info_flags) unsigned long thread_info_flags)
{ {
/* Handle pending signal delivery */ /* Handle pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING) if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs); do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) if (thread_info_flags & _TIF_NOTIFY_RESUME)
......
...@@ -99,6 +99,7 @@ struct thread_info { ...@@ -99,6 +99,7 @@ struct thread_info {
#define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_SLD 18 /* Restore split lock detection on context switch */ #define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_NOTIFY_SIGNAL 19 /* signal notifications exist */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */
...@@ -127,6 +128,7 @@ struct thread_info { ...@@ -127,6 +128,7 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32) #define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_SLD (1 << TIF_SLD) #define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
......
...@@ -1889,6 +1889,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) ...@@ -1889,6 +1889,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (ctrl == PR_SPEC_FORCE_DISABLE) if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task); task_set_spec_ib_force_disable(task);
task_update_spec_tif(task); task_update_spec_tif(task);
if (task == current)
indirect_branch_prediction_barrier();
break; break;
default: default:
return -ERANGE; return -ERANGE;
......
...@@ -798,11 +798,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) ...@@ -798,11 +798,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
* want to handle. Thus you cannot kill init even with a SIGKILL even by * want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake. * mistake.
*/ */
void arch_do_signal(struct pt_regs *regs) void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
{ {
struct ksignal ksig; struct ksignal ksig;
if (get_signal(&ksig)) { if (has_signal && get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */ /* Whee! Actually deliver the signal. */
handle_signal(&ksig, regs); handle_signal(&ksig, regs);
return; return;
......
此差异已折叠。
...@@ -1771,7 +1771,6 @@ static int nbd_dev_add(int index) ...@@ -1771,7 +1771,6 @@ static int nbd_dev_add(int index)
struct gendisk *disk; struct gendisk *disk;
struct request_queue *q; struct request_queue *q;
int err = -ENOMEM; int err = -ENOMEM;
int first_minor = index << part_shift;
nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
if (!nbd) if (!nbd)
...@@ -1835,7 +1834,7 @@ static int nbd_dev_add(int index) ...@@ -1835,7 +1834,7 @@ static int nbd_dev_add(int index)
refcount_set(&nbd->refs, 1); refcount_set(&nbd->refs, 1);
INIT_LIST_HEAD(&nbd->list); INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR; disk->major = NBD_MAJOR;
disk->first_minor = first_minor; disk->first_minor = index << part_shift;
disk->fops = &nbd_fops; disk->fops = &nbd_fops;
disk->private_data = nbd; disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index); sprintf(disk->disk_name, "nbd%d", index);
......
...@@ -1365,6 +1365,9 @@ __acquires(bitmap->lock) ...@@ -1365,6 +1365,9 @@ __acquires(bitmap->lock)
sector_t csize; sector_t csize;
int err; int err;
if (page >= bitmap->pages)
return NULL;
err = md_bitmap_checkpage(bitmap, page, create, 0); err = md_bitmap_checkpage(bitmap, page, create, 0);
if (bitmap->bp[page].hijacked || if (bitmap->bp[page].hijacked ||
......
...@@ -3846,35 +3846,51 @@ static int analyze_sbs(struct mddev *mddev) ...@@ -3846,35 +3846,51 @@ static int analyze_sbs(struct mddev *mddev)
*/ */
int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
{ {
unsigned long result = 0; unsigned long result = 0, decimals = 0;
long decimals = -1; char *pos, *str;
while (isdigit(*cp) || (*cp == '.' && decimals < 0)) { int rv;
if (*cp == '.')
decimals = 0; str = kmemdup_nul(cp, strlen(cp), GFP_KERNEL);
else if (decimals < scale) { if (!str)
unsigned int value; return -ENOMEM;
value = *cp - '0'; pos = strchr(str, '.');
result = result * 10 + value; if (pos) {
if (decimals >= 0) int cnt = scale;
decimals++;
} *pos = '\0';
cp++; while (isdigit(*(++pos))) {
} if (cnt) {
if (*cp == '\n') decimals = decimals * 10 + *pos - '0';
cp++; cnt--;
if (*cp) }
}
if (*pos == '\n')
pos++;
if (*pos) {
kfree(str);
return -EINVAL; return -EINVAL;
if (decimals < 0) }
decimals = 0; decimals *= int_pow(10, cnt);
*res = result * int_pow(10, scale - decimals); }
return 0;
rv = kstrtoul(str, 10, &result);
kfree(str);
if (rv)
return rv;
if (result > (ULONG_MAX - decimals) / (unsigned int)int_pow(10, scale))
return -EINVAL;
*res = result * int_pow(10, scale) + decimals;
return rv;
} }
static ssize_t static ssize_t
safe_delay_show(struct mddev *mddev, char *page) safe_delay_show(struct mddev *mddev, char *page)
{ {
int msec = (mddev->safemode_delay*1000)/HZ; unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
return sprintf(page, "%u.%03u\n", msec/1000, msec%1000);
} }
static ssize_t static ssize_t
safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
...@@ -3888,10 +3904,14 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) ...@@ -3888,10 +3904,14 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
if (strict_strtoul_scaled(cbuf, &msec, 3) < 0) if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
return -EINVAL; return -EINVAL;
if (msec > UINT_MAX)
return -EINVAL;
if (msec == 0) if (msec == 0)
mddev->safemode_delay = 0; mddev->safemode_delay = 0;
else { else {
unsigned long old_delay = mddev->safemode_delay; unsigned long old_delay = mddev->safemode_delay;
/* HZ <= 1000, so new_delay < UINT_MAX, too */
unsigned long new_delay = (msec*HZ)/1000; unsigned long new_delay = (msec*HZ)/1000;
if (new_delay == 0) if (new_delay == 0)
...@@ -4543,7 +4563,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor ...@@ -4543,7 +4563,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor
static ssize_t static ssize_t
max_corrected_read_errors_show(struct mddev *mddev, char *page) { max_corrected_read_errors_show(struct mddev *mddev, char *page) {
return sprintf(page, "%d\n", return sprintf(page, "%u\n",
atomic_read(&mddev->max_corr_read_errors)); atomic_read(&mddev->max_corr_read_errors));
} }
......
...@@ -2325,7 +2325,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2325,7 +2325,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
int sect = 0; /* Offset from r10_bio->sector */ int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors; int sectors = r10_bio->sectors;
struct md_rdev *rdev; struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors); unsigned int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum; int d = r10_bio->devs[r10_bio->read_slot].devnum;
/* still own a reference to this rdev, so it cannot /* still own a reference to this rdev, so it cannot
...@@ -2344,7 +2344,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2344,7 +2344,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
bdevname(rdev->bdev, b); bdevname(rdev->bdev, b);
pr_notice("md/raid10:%s: %s: Raid device exceeded read_error threshold [cur %d:max %d]\n", pr_notice("md/raid10:%s: %s: Raid device exceeded read_error threshold [cur %u:max %u]\n",
mdname(mddev), b, mdname(mddev), b,
atomic_read(&rdev->read_errors), max_read_errors); atomic_read(&rdev->read_errors), max_read_errors);
pr_notice("md/raid10:%s: %s: Failing raid device\n", pr_notice("md/raid10:%s: %s: Failing raid device\n",
......
...@@ -32,8 +32,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o ...@@ -32,8 +32,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o
obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_IO_URING) += io_uring.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FS_VERITY) += verity/
......
...@@ -520,7 +520,7 @@ static bool dump_interrupted(void) ...@@ -520,7 +520,7 @@ static bool dump_interrupted(void)
* but then we need to teach dump_write() to restart and clear * but then we need to teach dump_write() to restart and clear
* TIF_SIGPENDING. * TIF_SIGPENDING.
*/ */
return signal_pending(current); return fatal_signal_pending(current) || freezing(current);
} }
static void wait_for_dump_helpers(struct file *file) static void wait_for_dump_helpers(struct file *file)
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include <linux/close_range.h> #include <linux/close_range.h>
#include <net/sock.h> #include <net/sock.h>
#include "internal.h"
unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG; unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */ /* our min() is unusable in constant expressions ;-/ */
...@@ -829,9 +831,8 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) ...@@ -829,9 +831,8 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
} }
/* /*
* variant of __close_fd that gets a ref on the file for later fput. * See close_fd_get_file() below, this variant assumes current->files->file_lock
* The caller must ensure that filp_close() called on the file, and then * is held.
* an fput().
*/ */
int __close_fd_get_file(unsigned int fd, struct file **res) int __close_fd_get_file(unsigned int fd, struct file **res)
{ {
...@@ -839,26 +840,39 @@ int __close_fd_get_file(unsigned int fd, struct file **res) ...@@ -839,26 +840,39 @@ int __close_fd_get_file(unsigned int fd, struct file **res)
struct file *file; struct file *file;
struct fdtable *fdt; struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files); fdt = files_fdtable(files);
if (fd >= fdt->max_fds) if (fd >= fdt->max_fds)
goto out_unlock; goto out_err;
file = fdt->fd[fd]; file = fdt->fd[fd];
if (!file) if (!file)
goto out_unlock; goto out_err;
rcu_assign_pointer(fdt->fd[fd], NULL); rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd); __put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
get_file(file); get_file(file);
*res = file; *res = file;
return 0; return 0;
out_err:
out_unlock:
spin_unlock(&files->file_lock);
*res = NULL; *res = NULL;
return -ENOENT; return -ENOENT;
} }
/*
* variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
int close_fd_get_file(unsigned int fd, struct file **res)
{
struct files_struct *files = current->files;
int ret;
spin_lock(&files->file_lock);
ret = __close_fd_get_file(fd, res);
spin_unlock(&files->file_lock);
return ret;
}
void do_close_on_exec(struct files_struct *files) void do_close_on_exec(struct files_struct *files)
{ {
unsigned i; unsigned i;
......
...@@ -77,6 +77,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, ...@@ -77,6 +77,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
long do_rmdir(int dfd, struct filename *name); long do_rmdir(int dfd, struct filename *name);
long do_unlinkat(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct path *link); int may_linkat(struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
/* /*
* namespace.c * namespace.c
...@@ -132,6 +134,7 @@ extern struct file *do_file_open_root(const struct path *, ...@@ -132,6 +134,7 @@ extern struct file *do_file_open_root(const struct path *,
const char *, const struct open_flags *); const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode); extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op); extern int build_open_flags(const struct open_how *how, struct open_flags *op);
extern int __close_fd_get_file(unsigned int fd, struct file **res);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small); long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode); int chmod_common(const struct path *path, umode_t mode);
......
...@@ -533,6 +533,8 @@ static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) ...@@ -533,6 +533,8 @@ static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
p->stack = p->internal; p->stack = p->internal;
p->dfd = dfd; p->dfd = dfd;
p->name = name; p->name = name;
p->path.mnt = NULL;
p->path.dentry = NULL;
p->total_link_count = old ? old->total_link_count : 0; p->total_link_count = old ? old->total_link_count : 0;
p->saved = old; p->saved = old;
p->state = 0; p->state = 0;
...@@ -607,6 +609,8 @@ static void terminate_walk(struct nameidata *nd) ...@@ -607,6 +609,8 @@ static void terminate_walk(struct nameidata *nd)
rcu_read_unlock(); rcu_read_unlock();
} }
nd->depth = 0; nd->depth = 0;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
} }
/* path_put is needed afterwards regardless of success or failure */ /* path_put is needed afterwards regardless of success or failure */
...@@ -635,6 +639,11 @@ static inline bool legitimize_path(struct nameidata *nd, ...@@ -635,6 +639,11 @@ static inline bool legitimize_path(struct nameidata *nd,
static bool legitimize_links(struct nameidata *nd) static bool legitimize_links(struct nameidata *nd)
{ {
int i; int i;
if (unlikely(nd->flags & LOOKUP_CACHED)) {
drop_links(nd);
nd->depth = 0;
return false;
}
for (i = 0; i < nd->depth; i++) { for (i = 0; i < nd->depth; i++) {
struct saved *last = nd->stack + i; struct saved *last = nd->stack + i;
if (unlikely(!legitimize_path(nd, &last->link, last->seq))) { if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
...@@ -798,6 +807,7 @@ static int complete_walk(struct nameidata *nd) ...@@ -798,6 +807,7 @@ static int complete_walk(struct nameidata *nd)
if (!(nd->state & ND_ROOT_PRESET)) if (!(nd->state & ND_ROOT_PRESET))
if (!(nd->flags & LOOKUP_IS_SCOPED)) if (!(nd->flags & LOOKUP_IS_SCOPED))
nd->root.mnt = NULL; nd->root.mnt = NULL;
nd->flags &= ~LOOKUP_CACHED;
if (!try_to_unlazy(nd)) if (!try_to_unlazy(nd))
return -ECHILD; return -ECHILD;
} }
...@@ -2210,6 +2220,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) ...@@ -2210,6 +2220,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
int error; int error;
const char *s = nd->name->name; const char *s = nd->name->name;
/* LOOKUP_CACHED requires RCU, ask caller to retry */
if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
return ERR_PTR(-EAGAIN);
if (!*s) if (!*s)
flags &= ~LOOKUP_RCU; flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU) if (flags & LOOKUP_RCU)
...@@ -2240,8 +2254,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) ...@@ -2240,8 +2254,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
} }
nd->root.mnt = NULL; nd->root.mnt = NULL;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
/* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
...@@ -4353,8 +4365,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -4353,8 +4365,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
} }
EXPORT_SYMBOL(vfs_rename); EXPORT_SYMBOL(vfs_rename);
static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, int do_renameat2(int olddfd, struct filename *from, int newdfd,
const char __user *newname, unsigned int flags) struct filename *to, unsigned int flags)
{ {
struct dentry *old_dentry, *new_dentry; struct dentry *old_dentry, *new_dentry;
struct dentry *trap; struct dentry *trap;
...@@ -4362,32 +4374,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, ...@@ -4362,32 +4374,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
struct qstr old_last, new_last; struct qstr old_last, new_last;
int old_type, new_type; int old_type, new_type;
struct inode *delegated_inode = NULL; struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false; bool should_retry = false;
int error; int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL; goto put_both;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE)) (flags & RENAME_EXCHANGE))
return -EINVAL; goto put_both;
if (flags & RENAME_EXCHANGE) if (flags & RENAME_EXCHANGE)
target_flags = 0; target_flags = 0;
retry: retry:
from = filename_parentat(olddfd, getname(oldname), lookup_flags, from = filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_path, &old_last, &old_type); &old_last, &old_type);
if (IS_ERR(from)) { if (IS_ERR(from)) {
error = PTR_ERR(from); error = PTR_ERR(from);
goto exit; goto put_new;
} }
to = filename_parentat(newdfd, getname(newname), lookup_flags, to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_path, &new_last, &new_type); &new_type);
if (IS_ERR(to)) { if (IS_ERR(to)) {
error = PTR_ERR(to); error = PTR_ERR(to);
goto exit1; goto exit1;
...@@ -4480,34 +4490,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, ...@@ -4480,34 +4490,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
if (retry_estale(error, lookup_flags)) if (retry_estale(error, lookup_flags))
should_retry = true; should_retry = true;
path_put(&new_path); path_put(&new_path);
putname(to);
exit1: exit1:
path_put(&old_path); path_put(&old_path);
putname(from);
if (should_retry) { if (should_retry) {
should_retry = false; should_retry = false;
lookup_flags |= LOOKUP_REVAL; lookup_flags |= LOOKUP_REVAL;
goto retry; goto retry;
} }
exit: put_both:
if (!IS_ERR(from))
putname(from);
put_new:
if (!IS_ERR(to))
putname(to);
return error; return error;
} }
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, unsigned int, flags) int, newdfd, const char __user *, newname, unsigned int, flags)
{ {
return do_renameat2(olddfd, oldname, newdfd, newname, flags); return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
flags);
} }
SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname) int, newdfd, const char __user *, newname)
{ {
return do_renameat2(olddfd, oldname, newdfd, newname, 0); return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
0);
} }
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{ {
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
getname(newname), 0);
} }
int readlink_copy(char __user *buffer, int buflen, const char *link) int readlink_copy(char __user *buffer, int buflen, const char *link)
......
...@@ -1099,6 +1099,12 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) ...@@ -1099,6 +1099,12 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
lookup_flags |= LOOKUP_BENEATH; lookup_flags |= LOOKUP_BENEATH;
if (how->resolve & RESOLVE_IN_ROOT) if (how->resolve & RESOLVE_IN_ROOT)
lookup_flags |= LOOKUP_IN_ROOT; lookup_flags |= LOOKUP_IN_ROOT;
if (how->resolve & RESOLVE_CACHED) {
/* Don't bother even trying for create/truncate/tmpfile open */
if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
return -EAGAIN;
lookup_flags |= LOOKUP_CACHED;
}
op->lookup_flags = lookup_flags; op->lookup_flags = lookup_flags;
return 0; return 0;
......
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
#define EXIT_TO_USER_MODE_WORK \ #define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK) ARCH_EXIT_TO_USER_MODE_WORK)
/** /**
...@@ -260,12 +260,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } ...@@ -260,12 +260,13 @@ static __always_inline void arch_exit_to_user_mode(void) { }
#endif #endif
/** /**
* arch_do_signal - Architecture specific signal delivery function * arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs * @regs: Pointer to currents pt_regs
* @has_signal: actual signal to handle
* *
* Invoked from exit_to_user_mode_loop(). * Invoked from exit_to_user_mode_loop().
*/ */
void arch_do_signal(struct pt_regs *regs); void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
/** /**
* arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#endif #endif
#define XFER_TO_GUEST_MODE_WORK \ #define XFER_TO_GUEST_MODE_WORK \
(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu; struct kvm_vcpu;
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
/* List of all valid flags for the how->resolve argument: */ /* List of all valid flags for the how->resolve argument: */
#define VALID_RESOLVE_FLAGS \ #define VALID_RESOLVE_FLAGS \
(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \ (RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
RESOLVE_BENEATH | RESOLVE_IN_ROOT) RESOLVE_BENEATH | RESOLVE_IN_ROOT | RESOLVE_CACHED)
/* List of all open_how "versions". */ /* List of all open_how "versions". */
#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ #define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
......
...@@ -124,7 +124,7 @@ extern void __fd_install(struct files_struct *files, ...@@ -124,7 +124,7 @@ extern void __fd_install(struct files_struct *files,
extern int __close_fd(struct files_struct *files, extern int __close_fd(struct files_struct *files,
unsigned int fd); unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern int __close_fd_get_file(unsigned int fd, struct file **res); extern int close_fd_get_file(unsigned int fd, struct file **res);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp); struct files_struct **new_fdp);
......
...@@ -22,6 +22,7 @@ struct io_identity { ...@@ -22,6 +22,7 @@ struct io_identity {
refcount_t count; refcount_t count;
}; };
#ifdef __GENKSYMS__
struct io_uring_task { struct io_uring_task {
/* submission side */ /* submission side */
struct xarray xa; struct xarray xa;
...@@ -33,22 +34,22 @@ struct io_uring_task { ...@@ -33,22 +34,22 @@ struct io_uring_task {
atomic_t in_idle; atomic_t in_idle;
bool sqpoll; bool sqpoll;
}; };
#endif
#if defined(CONFIG_IO_URING) #if defined(CONFIG_IO_URING)
struct sock *io_uring_get_socket(struct file *file); struct sock *io_uring_get_socket(struct file *file);
void __io_uring_task_cancel(void); void __io_uring_cancel(bool cancel_all);
void __io_uring_files_cancel(struct files_struct *files);
void __io_uring_free(struct task_struct *tsk); void __io_uring_free(struct task_struct *tsk);
static inline void io_uring_task_cancel(void) static inline void io_uring_files_cancel(void)
{ {
if (current->io_uring && !xa_empty(&current->io_uring->xa)) if (current->io_uring)
__io_uring_task_cancel(); __io_uring_cancel(false);
} }
static inline void io_uring_files_cancel(struct files_struct *files) static inline void io_uring_task_cancel(void)
{ {
if (current->io_uring && !xa_empty(&current->io_uring->xa)) if (current->io_uring)
__io_uring_files_cancel(files); __io_uring_cancel(true);
} }
static inline void io_uring_free(struct task_struct *tsk) static inline void io_uring_free(struct task_struct *tsk)
{ {
...@@ -63,7 +64,7 @@ static inline struct sock *io_uring_get_socket(struct file *file) ...@@ -63,7 +64,7 @@ static inline struct sock *io_uring_get_socket(struct file *file)
static inline void io_uring_task_cancel(void) static inline void io_uring_task_cancel(void)
{ {
} }
static inline void io_uring_files_cancel(struct files_struct *files) static inline void io_uring_files_cancel(void)
{ {
} }
static inline void io_uring_free(struct task_struct *tsk) static inline void io_uring_free(struct task_struct *tsk)
......
...@@ -43,6 +43,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; ...@@ -43,6 +43,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */
#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
......
...@@ -1399,7 +1399,7 @@ struct task_struct { ...@@ -1399,7 +1399,7 @@ struct task_struct {
*/ */
randomized_struct_fields_end randomized_struct_fields_end
KABI_RESERVE(1) KABI_USE(1, void *pf_io_worker)
KABI_RESERVE(2) KABI_RESERVE(2)
KABI_RESERVE(3) KABI_RESERVE(3)
KABI_RESERVE(4) KABI_RESERVE(4)
......
...@@ -360,11 +360,23 @@ static inline int restart_syscall(void) ...@@ -360,11 +360,23 @@ static inline int restart_syscall(void)
return -ERESTARTNOINTR; return -ERESTARTNOINTR;
} }
static inline int signal_pending(struct task_struct *p) static inline int task_sigpending(struct task_struct *p)
{ {
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
} }
static inline int signal_pending(struct task_struct *p)
{
/*
* TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
* behavior in terms of ensuring that we break out of wait loops
* so that notify signal callbacks can be processed.
*/
if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
return 1;
return task_sigpending(p);
}
static inline int __fatal_signal_pending(struct task_struct *p) static inline int __fatal_signal_pending(struct task_struct *p)
{ {
return unlikely(sigismember(&p->pending.signal, SIGKILL)); return unlikely(sigismember(&p->pending.signal, SIGKILL));
...@@ -372,7 +384,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) ...@@ -372,7 +384,7 @@ static inline int __fatal_signal_pending(struct task_struct *p)
static inline int fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p)
{ {
return signal_pending(p) && __fatal_signal_pending(p); return task_sigpending(p) && __fatal_signal_pending(p);
} }
static inline int signal_pending_state(long state, struct task_struct *p) static inline int signal_pending_state(long state, struct task_struct *p)
...@@ -509,7 +521,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); ...@@ -509,7 +521,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);
static inline void restore_saved_sigmask_unless(bool interrupted) static inline void restore_saved_sigmask_unless(bool interrupted)
{ {
if (interrupted) if (interrupted)
WARN_ON(!test_thread_flag(TIF_SIGPENDING)); WARN_ON(!signal_pending(current));
else else
restore_saved_sigmask(); restore_saved_sigmask();
} }
......
...@@ -31,6 +31,7 @@ struct kernel_clone_args { ...@@ -31,6 +31,7 @@ struct kernel_clone_args {
/* Number of elements in *set_tid */ /* Number of elements in *set_tid */
size_t set_tid_size; size_t set_tid_size;
int cgroup; int cgroup;
int io_thread;
struct cgroup *cgrp; struct cgroup *cgrp;
struct css_set *cset; struct css_set *cset;
}; };
...@@ -85,6 +86,7 @@ extern void exit_files(struct task_struct *); ...@@ -85,6 +86,7 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct task_struct *); extern void exit_itimers(struct task_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs); extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int); struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void); struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
......
...@@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags, ...@@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags, int __user *upeer_addrlen, int flags,
unsigned long nofile); unsigned long nofile);
extern struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
...@@ -436,5 +439,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, ...@@ -436,5 +439,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len); int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol, extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec); int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how); extern int __sys_shutdown(int fd, int how);
#endif /* _LINUX_SOCKET_H */ #endif /* _LINUX_SOCKET_H */
...@@ -22,6 +22,8 @@ enum task_work_notify_mode { ...@@ -22,6 +22,8 @@ enum task_work_notify_mode {
int task_work_add(struct task_struct *task, struct callback_head *twork, int task_work_add(struct task_struct *task, struct callback_head *twork,
enum task_work_notify_mode mode); enum task_work_notify_mode mode);
struct callback_head *task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void); void task_work_run(void);
......
...@@ -202,4 +202,27 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) ...@@ -202,4 +202,27 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
} }
/*
* called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This
* is currently used by TWA_SIGNAL based task_work, which requires breaking
* wait loops to ensure that task_work is noticed and run.
*/
static inline void tracehook_notify_signal(void)
{
clear_thread_flag(TIF_NOTIFY_SIGNAL);
smp_mb__after_atomic();
if (current->task_works)
task_work_run();
}
/*
* Called when we have work to process from exit_to_user_mode_loop()
*/
static inline void set_notify_signal(struct task_struct *task)
{
if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
!wake_up_state(task, TASK_INTERRUPTIBLE))
kick_process(task);
}
#endif /* <linux/tracehook.h> */ #endif /* <linux/tracehook.h> */
...@@ -26,6 +26,12 @@ enum iter_type { ...@@ -26,6 +26,12 @@ enum iter_type {
ITER_DISCARD = 64, ITER_DISCARD = 64,
}; };
struct iov_iter_state {
size_t iov_offset;
size_t count;
unsigned long nr_segs;
};
struct iov_iter { struct iov_iter {
/* /*
* Bit 0 is the read/write bit, set if we're writing. * Bit 0 is the read/write bit, set if we're writing.
...@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i) ...@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->type & ~(READ | WRITE); return i->type & ~(READ | WRITE);
} }
static inline void iov_iter_save_state(struct iov_iter *iter,
struct iov_iter_state *state)
{
state->iov_offset = iter->iov_offset;
state->count = iter->count;
state->nr_segs = iter->nr_segs;
}
static inline bool iter_is_iovec(const struct iov_iter *i) static inline bool iter_is_iovec(const struct iov_iter *i)
{ {
return iov_iter_type(i) == ITER_IOVEC; return iov_iter_type(i) == ITER_IOVEC;
...@@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, ...@@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start); size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages); int iov_iter_npages(const struct iov_iter *i, int maxpages);
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
......
...@@ -1334,4 +1334,11 @@ static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) ...@@ -1334,4 +1334,11 @@ static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
} }
/* Make sure qdisc is no longer in SCHED state. */
static inline void qdisc_synchronize(const struct Qdisc *q)
{
while (test_bit(__QDISC_STATE_SCHED, &q->state))
msleep(1);
}
#endif #endif
...@@ -49,7 +49,7 @@ TRACE_EVENT(io_uring_create, ...@@ -49,7 +49,7 @@ TRACE_EVENT(io_uring_create,
); );
/** /**
* io_uring_register - called after a buffer/file/eventfd was succesfully * io_uring_register - called after a buffer/file/eventfd was successfully
* registered for a ring * registered for a ring
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
...@@ -290,38 +290,42 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -290,38 +290,42 @@ TRACE_EVENT(io_uring_fail_link,
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @res: result of the request * @res: result of the request
* @cflags: completion flags
* *
*/ */
TRACE_EVENT(io_uring_complete, TRACE_EVENT(io_uring_complete,
TP_PROTO(void *ctx, u64 user_data, long res), TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
TP_ARGS(ctx, user_data, res), TP_ARGS(ctx, user_data, res, cflags),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( u64, user_data ) __field( u64, user_data )
__field( long, res ) __field( int, res )
__field( unsigned, cflags )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->res = res; __entry->res = res;
__entry->cflags = cflags;
), ),
TP_printk("ring %p, user_data 0x%llx, result %ld", TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
__entry->ctx, (unsigned long long)__entry->user_data, __entry->ctx, (unsigned long long)__entry->user_data,
__entry->res) __entry->res, __entry->cflags)
); );
/** /**
* io_uring_submit_sqe - called before submitting one SQE * io_uring_submit_sqe - called before submitting one SQE
* *
* @ctx: pointer to a ring context structure * @ctx: pointer to a ring context structure
* @req: pointer to a submitted request
* @opcode: opcode of request * @opcode: opcode of request
* @user_data: user data associated with the request * @user_data: user data associated with the request
* @flags request flags
* @force_nonblock: whether a context blocking or not * @force_nonblock: whether a context blocking or not
* @sq_thread: true if sq_thread has submitted this SQE * @sq_thread: true if sq_thread has submitted this SQE
* *
...@@ -330,41 +334,60 @@ TRACE_EVENT(io_uring_complete, ...@@ -330,41 +334,60 @@ TRACE_EVENT(io_uring_complete,
*/ */
TRACE_EVENT(io_uring_submit_sqe, TRACE_EVENT(io_uring_submit_sqe,
TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock, TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
bool sq_thread), bool force_nonblock, bool sq_thread),
TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread), TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req )
__field( u8, opcode ) __field( u8, opcode )
__field( u64, user_data ) __field( u64, user_data )
__field( u32, flags )
__field( bool, force_nonblock ) __field( bool, force_nonblock )
__field( bool, sq_thread ) __field( bool, sq_thread )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req;
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->flags = flags;
__entry->force_nonblock = force_nonblock; __entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread; __entry->sq_thread = sq_thread;
), ),
TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d", TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
__entry->ctx, __entry->opcode, "non block %d, sq_thread %d", __entry->ctx, __entry->req,
(unsigned long long) __entry->user_data, __entry->opcode, (unsigned long long)__entry->user_data,
__entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->force_nonblock, __entry->sq_thread)
); );
/*
* io_uring_poll_arm - called after arming a poll wait if successful
*
* @ctx: pointer to a ring context structure
* @req: pointer to the armed request
* @opcode: opcode of request
* @user_data: user data associated with the request
* @mask: request poll events mask
* @events: registered events of interest
*
* Allows to track which fds are waiting for and what are the events of
* interest.
*/
TRACE_EVENT(io_uring_poll_arm, TRACE_EVENT(io_uring_poll_arm,
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events), TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
int mask, int events),
TP_ARGS(ctx, opcode, user_data, mask, events), TP_ARGS(ctx, req, opcode, user_data, mask, events),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req )
__field( u8, opcode ) __field( u8, opcode )
__field( u64, user_data ) __field( u64, user_data )
__field( int, mask ) __field( int, mask )
...@@ -373,14 +396,15 @@ TRACE_EVENT(io_uring_poll_arm, ...@@ -373,14 +396,15 @@ TRACE_EVENT(io_uring_poll_arm,
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req;
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->mask = mask; __entry->mask = mask;
__entry->events = events; __entry->events = events;
), ),
TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x", TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
__entry->ctx, __entry->opcode, __entry->ctx, __entry->req, __entry->opcode,
(unsigned long long) __entry->user_data, (unsigned long long) __entry->user_data,
__entry->mask, __entry->events) __entry->mask, __entry->events)
); );
...@@ -437,26 +461,39 @@ TRACE_EVENT(io_uring_task_add, ...@@ -437,26 +461,39 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask) __entry->mask)
); );
/*
* io_uring_task_run - called when task_work_run() executes the poll events
* notification callbacks
*
* @ctx: pointer to a ring context structure
* @req: pointer to the armed request
* @opcode: opcode of request
* @user_data: user data associated with the request
*
* Allows to track when notified poll events are processed
*/
TRACE_EVENT(io_uring_task_run, TRACE_EVENT(io_uring_task_run,
TP_PROTO(void *ctx, u8 opcode, u64 user_data), TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
TP_ARGS(ctx, opcode, user_data), TP_ARGS(ctx, req, opcode, user_data),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
__field( void *, req )
__field( u8, opcode ) __field( u8, opcode )
__field( u64, user_data ) __field( u64, user_data )
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = ctx; __entry->ctx = ctx;
__entry->req = req;
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->user_data = user_data; __entry->user_data = user_data;
), ),
TP_printk("ring %p, op %d, data 0x%llx", TP_printk("ring %p, req %p, op %d, data 0x%llx",
__entry->ctx, __entry->opcode, __entry->ctx, __entry->req, __entry->opcode,
(unsigned long long) __entry->user_data) (unsigned long long) __entry->user_data)
); );
......
...@@ -42,10 +42,11 @@ struct io_uring_sqe { ...@@ -42,10 +42,11 @@ struct io_uring_sqe {
__u32 statx_flags; __u32 statx_flags;
__u32 fadvise_advice; __u32 fadvise_advice;
__u32 splice_flags; __u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
__u32 hardlink_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
union {
struct {
/* pack this to avoid bogus arm OABI complaints */ /* pack this to avoid bogus arm OABI complaints */
union { union {
/* index into fixed buffers, if used */ /* index into fixed buffers, if used */
...@@ -55,10 +56,11 @@ struct io_uring_sqe { ...@@ -55,10 +56,11 @@ struct io_uring_sqe {
} __attribute__((packed)); } __attribute__((packed));
/* personality to use, if used */ /* personality to use, if used */
__u16 personality; __u16 personality;
union {
__s32 splice_fd_in; __s32 splice_fd_in;
__u32 file_index;
}; };
__u64 __pad2[3]; __u64 __pad2[2];
};
}; };
enum { enum {
...@@ -132,6 +134,9 @@ enum { ...@@ -132,6 +134,9 @@ enum {
IORING_OP_PROVIDE_BUFFERS, IORING_OP_PROVIDE_BUFFERS,
IORING_OP_REMOVE_BUFFERS, IORING_OP_REMOVE_BUFFERS,
IORING_OP_TEE, IORING_OP_TEE,
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
...@@ -146,13 +151,33 @@ enum { ...@@ -146,13 +151,33 @@ enum {
* sqe->timeout_flags * sqe->timeout_flags
*/ */
#define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_ABS (1U << 0)
#define IORING_TIMEOUT_UPDATE (1U << 1)
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
#define IORING_TIMEOUT_REALTIME (1U << 3)
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/* /*
* sqe->splice_flags * sqe->splice_flags
* extends splice(2) flags * extends splice(2) flags
*/ */
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
/*
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
* command flags for POLL_ADD are stored in sqe->len.
*
* IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
* the poll handler will continue to report
* CQEs on behalf of the same SQE.
*
* IORING_POLL_UPDATE Update existing poll request, matching
* sqe->addr as the old user_data field.
*/
#define IORING_POLL_ADD_MULTI (1U << 0)
#define IORING_POLL_UPDATE_EVENTS (1U << 1)
#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
/* /*
* IO completion data structure (Completion Queue Entry) * IO completion data structure (Completion Queue Entry)
*/ */
...@@ -166,8 +191,10 @@ struct io_uring_cqe { ...@@ -166,8 +191,10 @@ struct io_uring_cqe {
* cqe->flags * cqe->flags
* *
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
*/ */
#define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
enum { enum {
IORING_CQE_BUFFER_SHIFT = 16, IORING_CQE_BUFFER_SHIFT = 16,
...@@ -226,6 +253,7 @@ struct io_cqring_offsets { ...@@ -226,6 +253,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3)
/* /*
* Passed in for io_uring_setup(2). Copied back with updated info on success * Passed in for io_uring_setup(2). Copied back with updated info on success
...@@ -253,6 +281,10 @@ struct io_uring_params { ...@@ -253,6 +281,10 @@ struct io_uring_params {
#define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_FAST_POLL (1U << 5)
#define IORING_FEAT_POLL_32BITS (1U << 6) #define IORING_FEAT_POLL_32BITS (1U << 6)
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
#define IORING_FEAT_RSRC_TAGS (1U << 10)
/* /*
* io_uring_register(2) opcodes and arguments * io_uring_register(2) opcodes and arguments
...@@ -272,16 +304,62 @@ enum { ...@@ -272,16 +304,62 @@ enum {
IORING_REGISTER_RESTRICTIONS = 11, IORING_REGISTER_RESTRICTIONS = 11,
IORING_REGISTER_ENABLE_RINGS = 12, IORING_REGISTER_ENABLE_RINGS = 12,
/* extended with tagging */
IORING_REGISTER_FILES2 = 13,
IORING_REGISTER_FILES_UPDATE2 = 14,
IORING_REGISTER_BUFFERS2 = 15,
IORING_REGISTER_BUFFERS_UPDATE = 16,
/* set/clear io-wq thread affinities */
IORING_REGISTER_IOWQ_AFF = 17,
IORING_UNREGISTER_IOWQ_AFF = 18,
/* set/get max number of io-wq workers */
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
/* this goes last */ /* this goes last */
IORING_REGISTER_LAST IORING_REGISTER_LAST
}; };
/* io-wq worker categories */
enum {
IO_WQ_BOUND,
IO_WQ_UNBOUND,
};
/* deprecated, see struct io_uring_rsrc_update */
struct io_uring_files_update { struct io_uring_files_update {
__u32 offset; __u32 offset;
__u32 resv; __u32 resv;
__aligned_u64 /* __s32 * */ fds; __aligned_u64 /* __s32 * */ fds;
}; };
struct io_uring_rsrc_register {
__u32 nr;
__u32 resv;
__u64 resv2;
__aligned_u64 data;
__aligned_u64 tags;
};
struct io_uring_rsrc_update {
__u32 offset;
__u32 resv;
__aligned_u64 data;
};
struct io_uring_rsrc_update2 {
__u32 offset;
__u32 resv;
__aligned_u64 data;
__aligned_u64 tags;
__u32 nr;
__u32 resv2;
};
/* Skip updating fd indexes set to this value in the fd table */
#define IORING_REGISTER_FILES_SKIP (-2)
#define IO_URING_OP_SUPPORTED (1U << 0) #define IO_URING_OP_SUPPORTED (1U << 0)
struct io_uring_probe_op { struct io_uring_probe_op {
...@@ -329,4 +407,11 @@ enum { ...@@ -329,4 +407,11 @@ enum {
IORING_RESTRICTION_LAST IORING_RESTRICTION_LAST
}; };
struct io_uring_getevents_arg {
__u64 sigmask;
__u32 sigmask_sz;
__u32 pad;
__u64 ts;
};
#endif #endif
...@@ -35,5 +35,9 @@ struct open_how { ...@@ -35,5 +35,9 @@ struct open_how {
#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
be scoped inside the dirfd be scoped inside the dirfd
(similar to chroot(2)). */ (similar to chroot(2)). */
#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
completed through cached lookup. May
return -EAGAIN if that's not
possible. */
#endif /* _UAPI_LINUX_OPENAT2_H */ #endif /* _UAPI_LINUX_OPENAT2_H */
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for io_uring
obj-$(CONFIG_IO_URING) += io_uring.o
obj-$(CONFIG_IO_WQ) += io-wq.o
此差异已折叠。
#ifndef INTERNAL_IO_WQ_H #ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H
#include <linux/refcount.h>
#include <linux/io_uring.h> #include <linux/io_uring.h>
struct io_wq; struct io_wq;
enum { enum {
IO_WQ_WORK_CANCEL = 1, IO_WQ_WORK_CANCEL = 1,
IO_WQ_WORK_HASHED = 2, IO_WQ_WORK_HASHED = 2,
IO_WQ_WORK_UNBOUND = 4, IO_WQ_WORK_UNBOUND = 4,
IO_WQ_WORK_NO_CANCEL = 8,
IO_WQ_WORK_CONCURRENT = 16, IO_WQ_WORK_CONCURRENT = 16,
IO_WQ_WORK_FILES = 32,
IO_WQ_WORK_FS = 64,
IO_WQ_WORK_MM = 128,
IO_WQ_WORK_CREDS = 256,
IO_WQ_WORK_BLKCG = 512,
IO_WQ_WORK_FSIZE = 1024,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
}; };
...@@ -52,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node, ...@@ -52,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
static inline void wq_list_add_tail(struct io_wq_work_node *node, static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list) struct io_wq_work_list *list)
{ {
node->next = NULL;
if (!list->first) { if (!list->first) {
list->last = node; list->last = node;
WRITE_ONCE(list->first, node); WRITE_ONCE(list->first, node);
...@@ -59,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, ...@@ -59,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
list->last->next = node; list->last->next = node;
list->last = node; list->last = node;
} }
node->next = NULL;
} }
static inline void wq_list_cut(struct io_wq_work_list *list, static inline void wq_list_cut(struct io_wq_work_list *list,
...@@ -95,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list, ...@@ -95,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
struct io_identity *identity;
unsigned flags; unsigned flags;
}; };
...@@ -107,37 +98,48 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) ...@@ -107,37 +98,48 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
return container_of(work->list.next, struct io_wq_work, list); return container_of(work->list.next, struct io_wq_work, list);
} }
typedef void (free_work_fn)(struct io_wq_work *); typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *); typedef void (io_wq_work_fn)(struct io_wq_work *);
struct io_wq_data { struct io_wq_hash {
struct user_struct *user; refcount_t refs;
unsigned long map;
struct wait_queue_head wait;
};
static inline void io_wq_put_hash(struct io_wq_hash *hash)
{
if (refcount_dec_and_test(&hash->refs))
kfree(hash);
}
struct io_wq_data {
struct io_wq_hash *hash;
struct task_struct *task;
io_wq_work_fn *do_work; io_wq_work_fn *do_work;
free_work_fn *free_work; free_work_fn *free_work;
}; };
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data); void io_wq_exit_start(struct io_wq *wq);
void io_wq_destroy(struct io_wq *wq); void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val); void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
int io_wq_max_workers(struct io_wq *wq, int *new_count);
static inline bool io_wq_is_hashed(struct io_wq_work *work) static inline bool io_wq_is_hashed(struct io_wq_work *work)
{ {
return work->flags & IO_WQ_WORK_HASHED; return work->flags & IO_WQ_WORK_HASHED;
} }
void io_wq_cancel_all(struct io_wq *wq);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *); typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
void *data, bool cancel_all); void *data, bool cancel_all);
struct task_struct *io_wq_get_task(struct io_wq *wq);
#if defined(CONFIG_IO_WQ) #if defined(CONFIG_IO_WQ)
extern void io_wq_worker_sleeping(struct task_struct *); extern void io_wq_worker_sleeping(struct task_struct *);
extern void io_wq_worker_running(struct task_struct *); extern void io_wq_worker_running(struct task_struct *);
...@@ -152,6 +154,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk) ...@@ -152,6 +154,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
static inline bool io_wq_current_is_worker(void) static inline bool io_wq_current_is_worker(void)
{ {
return in_task() && (current->flags & PF_IO_WORKER); return in_task() && (current->flags & PF_IO_WORKER) &&
current->pf_io_worker;
} }
#endif #endif
...@@ -135,7 +135,15 @@ static __always_inline void exit_to_user_mode(void) ...@@ -135,7 +135,15 @@ static __always_inline void exit_to_user_mode(void)
} }
/* Workaround to allow gradual conversion of architecture code */ /* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal(struct pt_regs *regs) { } void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { }
static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
{
if (ti_work & _TIF_NOTIFY_SIGNAL)
tracehook_notify_signal();
arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
}
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work) unsigned long ti_work)
...@@ -157,8 +165,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, ...@@ -157,8 +165,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
if (ti_work & _TIF_PATCH_PENDING) if (ti_work & _TIF_PATCH_PENDING)
klp_update_patch_state(current); klp_update_patch_state(current);
if (ti_work & _TIF_SIGPENDING) if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
arch_do_signal(regs); handle_signal_work(regs, ti_work);
if (ti_work & _TIF_NOTIFY_RESUME) { if (ti_work & _TIF_NOTIFY_RESUME) {
tracehook_notify_resume(regs); tracehook_notify_resume(regs);
......
...@@ -8,6 +8,9 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) ...@@ -8,6 +8,9 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
do { do {
int ret; int ret;
if (ti_work & _TIF_NOTIFY_SIGNAL)
tracehook_notify_signal();
if (ti_work & _TIF_SIGPENDING) { if (ti_work & _TIF_SIGPENDING) {
kvm_handle_signal_exit(vcpu); kvm_handle_signal_exit(vcpu);
return -EINTR; return -EINTR;
......
...@@ -1973,7 +1973,7 @@ bool uprobe_deny_signal(void) ...@@ -1973,7 +1973,7 @@ bool uprobe_deny_signal(void)
WARN_ON_ONCE(utask->state != UTASK_SSTEP); WARN_ON_ONCE(utask->state != UTASK_SSTEP);
if (signal_pending(t)) { if (task_sigpending(t)) {
spin_lock_irq(&t->sighand->siglock); spin_lock_irq(&t->sighand->siglock);
clear_tsk_thread_flag(t, TIF_SIGPENDING); clear_tsk_thread_flag(t, TIF_SIGPENDING);
spin_unlock_irq(&t->sighand->siglock); spin_unlock_irq(&t->sighand->siglock);
......
...@@ -763,7 +763,7 @@ void __noreturn do_exit(long code) ...@@ -763,7 +763,7 @@ void __noreturn do_exit(long code)
schedule(); schedule();
} }
io_uring_files_cancel(tsk->files); io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */ exit_signals(tsk); /* sets PF_EXITING */
/* sync mm's RSS info before statistics gathering */ /* sync mm's RSS info before statistics gathering */
......
...@@ -948,6 +948,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) ...@@ -948,6 +948,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL; tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL; tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL; tsk->wake_q.next = NULL;
tsk->pf_io_worker = NULL;
account_kernel_stack(tsk, 1); account_kernel_stack(tsk, 1);
...@@ -1994,6 +1995,8 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -1994,6 +1995,8 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node); p = dup_task_struct(current, node);
if (!p) if (!p)
goto fork_out; goto fork_out;
if (args->io_thread)
p->flags |= PF_IO_WORKER;
/* /*
* This _must_ happen before we call free_task(), i.e. before we jump * This _must_ happen before we call free_task(), i.e. before we jump
...@@ -2474,6 +2477,34 @@ struct mm_struct *copy_init_mm(void) ...@@ -2474,6 +2477,34 @@ struct mm_struct *copy_init_mm(void)
return dup_mm(NULL, &init_mm); return dup_mm(NULL, &init_mm);
} }
/*
* This is like kernel_clone(), but shaved down and tailored to just
* creating io_uring workers. It returns a created task, or an error pointer.
* The returned task is inactive, and the caller must fire it up through
* wake_up_new_task(p). All signals are blocked in the created task.
*/
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
{
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
CLONE_IO;
struct kernel_clone_args args = {
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
.io_thread = 1,
};
struct task_struct *tsk;
tsk = copy_process(NULL, 0, node, &args);
if (!IS_ERR(tsk)) {
sigfillset(&tsk->blocked);
sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
}
return tsk;
}
/* /*
* Ok, this is the main fork-routine. * Ok, this is the main fork-routine.
* *
......
...@@ -986,6 +986,7 @@ void __rcu_irq_enter_check_tick(void) ...@@ -986,6 +986,7 @@ void __rcu_irq_enter_check_tick(void)
} }
raw_spin_unlock_rcu_node(rdp->mynode); raw_spin_unlock_rcu_node(rdp->mynode);
} }
NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
#endif /* CONFIG_NO_HZ_FULL */ #endif /* CONFIG_NO_HZ_FULL */
/** /**
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include "../workqueue_internal.h" #include "../workqueue_internal.h"
#include "../../fs/io-wq.h" #include "../../io_uring/io-wq.h"
#include "../smpboot.h" #include "../smpboot.h"
#include "pelt.h" #include "pelt.h"
......
...@@ -986,7 +986,7 @@ static inline bool wants_signal(int sig, struct task_struct *p) ...@@ -986,7 +986,7 @@ static inline bool wants_signal(int sig, struct task_struct *p)
if (task_is_stopped_or_traced(p)) if (task_is_stopped_or_traced(p))
return false; return false;
return task_curr(p) || !signal_pending(p); return task_curr(p) || !task_sigpending(p);
} }
static void complete_signal(int sig, struct task_struct *p, enum pid_type type) static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
...@@ -2526,6 +2526,18 @@ bool get_signal(struct ksignal *ksig) ...@@ -2526,6 +2526,18 @@ bool get_signal(struct ksignal *ksig)
struct signal_struct *signal = current->signal; struct signal_struct *signal = current->signal;
int signr; int signr;
/*
* For non-generic architectures, check for TIF_NOTIFY_SIGNAL so
* that the arch handlers don't all have to do it. If we get here
* without TIF_SIGPENDING, just exit after running signal work.
*/
if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
tracehook_notify_signal();
if (!task_sigpending(current))
return false;
}
if (unlikely(uprobe_deny_signal())) if (unlikely(uprobe_deny_signal()))
return false; return false;
...@@ -2819,7 +2831,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) ...@@ -2819,7 +2831,7 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
/* Remove the signals this thread can handle. */ /* Remove the signals this thread can handle. */
sigandsets(&retarget, &retarget, &t->blocked); sigandsets(&retarget, &retarget, &t->blocked);
if (!signal_pending(t)) if (!task_sigpending(t))
signal_wake_up(t, 0); signal_wake_up(t, 0);
if (sigisemptyset(&retarget)) if (sigisemptyset(&retarget))
...@@ -2853,7 +2865,7 @@ void exit_signals(struct task_struct *tsk) ...@@ -2853,7 +2865,7 @@ void exit_signals(struct task_struct *tsk)
cgroup_threadgroup_change_end(tsk); cgroup_threadgroup_change_end(tsk);
if (!signal_pending(tsk)) if (!task_sigpending(tsk))
goto out; goto out;
unblocked = tsk->blocked; unblocked = tsk->blocked;
...@@ -2897,7 +2909,7 @@ long do_no_restart_syscall(struct restart_block *param) ...@@ -2897,7 +2909,7 @@ long do_no_restart_syscall(struct restart_block *param)
static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
{ {
if (signal_pending(tsk) && !thread_group_empty(tsk)) { if (task_sigpending(tsk) && !thread_group_empty(tsk)) {
sigset_t newblocked; sigset_t newblocked;
/* A set of now blocked but previously unblocked signals. */ /* A set of now blocked but previously unblocked signals. */
sigandnsets(&newblocked, newset, &current->blocked); sigandnsets(&newblocked, newset, &current->blocked);
......
...@@ -70,18 +70,17 @@ int task_work_add(struct task_struct *task, struct callback_head *work, ...@@ -70,18 +70,17 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
} }
/** /**
* task_work_cancel - cancel a pending work added by task_work_add() * task_work_cancel_match - cancel a pending work added by task_work_add()
* @task: the task which should execute the work * @task: the task which should execute the work
* @func: identifies the work to remove * @match: match function to call
*
* Find the last queued pending work with ->func == @func and remove
* it from queue.
* *
* RETURNS: * RETURNS:
* The found work or NULL if not found. * The found work or NULL if not found.
*/ */
struct callback_head * struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func) task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data),
void *data)
{ {
struct callback_head **pprev = &task->task_works; struct callback_head **pprev = &task->task_works;
struct callback_head *work; struct callback_head *work;
...@@ -97,7 +96,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) ...@@ -97,7 +96,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
*/ */
raw_spin_lock_irqsave(&task->pi_lock, flags); raw_spin_lock_irqsave(&task->pi_lock, flags);
while ((work = READ_ONCE(*pprev))) { while ((work = READ_ONCE(*pprev))) {
if (work->func != func) if (!match(work, data))
pprev = &work->next; pprev = &work->next;
else if (cmpxchg(pprev, work, work->next) == work) else if (cmpxchg(pprev, work, work->next) == work)
break; break;
...@@ -107,6 +106,28 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) ...@@ -107,6 +106,28 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
return work; return work;
} }
static bool task_work_func_match(struct callback_head *cb, void *data)
{
return cb->func == data;
}
/**
* task_work_cancel - cancel a pending work added by task_work_add()
* @task: the task which should execute the work
* @func: identifies the work to remove
*
* Find the last queued pending work with ->func == @func and remove
* it from queue.
*
* RETURNS:
* The found work or NULL if not found.
*/
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
return task_work_cancel_match(task, task_work_func_match, func);
}
/** /**
* task_work_run - execute the works added by task_work_add() * task_work_run - execute the works added by task_work_add()
* *
......
...@@ -1836,24 +1836,38 @@ int import_single_range(int rw, void __user *buf, size_t len, ...@@ -1836,24 +1836,38 @@ int import_single_range(int rw, void __user *buf, size_t len,
} }
EXPORT_SYMBOL(import_single_range); EXPORT_SYMBOL(import_single_range);
int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, /**
int (*f)(struct kvec *vec, void *context), * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
void *context) * iov_iter_save_state() was called.
*
* @i: &struct iov_iter to restore
* @state: state to restore from
*
* Used after iov_iter_save_state() to bring restore @i, if operations may
* have advanced it.
*
* Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
*/
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
{ {
struct kvec w; if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
int err = -EINVAL; !iov_iter_is_kvec(i))
if (!bytes) return;
return 0; i->iov_offset = state->iov_offset;
i->count = state->count;
iterate_all_kinds(i, bytes, v, -EINVAL, ({ /*
w.iov_base = kmap(v.bv_page) + v.bv_offset; * For the *vec iters, nr_segs + iov is constant - if we increment
w.iov_len = v.bv_len; * the vec, then we also decrement the nr_segs count. Hence we don't
err = f(&w, context); * need to track both of these, just one is enough and we can deduct
kunmap(v.bv_page); * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
err;}), ({ * size, so we can just increment the iov pointer as they are unionzed.
w = v; * ITER_BVEC _may_ be the same size on some archs, but on others it is
err = f(&w, context);}) * not. Be safe and handle it separately.
) */
return err; BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
if (iov_iter_is_bvec(i))
i->bvec -= state->nr_segs - i->nr_segs;
else
i->iov -= state->nr_segs - i->nr_segs;
i->nr_segs = state->nr_segs;
} }
EXPORT_SYMBOL(iov_iter_for_each_range);
...@@ -1427,6 +1427,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, ...@@ -1427,6 +1427,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
free: free:
kfree(table); kfree(table);
out: out:
mdev->sysctl = NULL;
return -ENOBUFS; return -ENOBUFS;
} }
...@@ -1436,6 +1437,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, ...@@ -1436,6 +1437,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct ctl_table *table; struct ctl_table *table;
if (!mdev->sysctl)
return;
table = mdev->sysctl->ctl_table_arg; table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl); unregister_net_sysctl_table(mdev->sysctl);
kfree(table); kfree(table);
......
...@@ -1619,6 +1619,7 @@ static void taprio_reset(struct Qdisc *sch) ...@@ -1619,6 +1619,7 @@ static void taprio_reset(struct Qdisc *sch)
int i; int i;
hrtimer_cancel(&q->advance_timer); hrtimer_cancel(&q->advance_timer);
if (q->qdiscs) { if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues; i++) for (i = 0; i < dev->num_tx_queues; i++)
if (q->qdiscs[i]) if (q->qdiscs[i])
...@@ -1642,6 +1643,7 @@ static void taprio_destroy(struct Qdisc *sch) ...@@ -1642,6 +1643,7 @@ static void taprio_destroy(struct Qdisc *sch)
* happens in qdisc_create(), after taprio_init() has been called. * happens in qdisc_create(), after taprio_init() has been called.
*/ */
hrtimer_cancel(&q->advance_timer); hrtimer_cancel(&q->advance_timer);
qdisc_synchronize(sch);
taprio_disable_offload(dev, q, NULL); taprio_disable_offload(dev, q, NULL);
......
...@@ -1688,30 +1688,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) ...@@ -1688,30 +1688,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog); return __sys_listen(fd, backlog);
} }
int __sys_accept4_file(struct file *file, unsigned file_flags, struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags, int __user *upeer_addrlen, int flags)
unsigned long nofile)
{ {
struct socket *sock, *newsock; struct socket *sock, *newsock;
struct file *newfile; struct file *newfile;
int err, len, newfd; int err, len;
struct sockaddr_storage address; struct sockaddr_storage address;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
sock = sock_from_file(file, &err); sock = sock_from_file(file, &err);
if (!sock) if (!sock)
goto out; return ERR_PTR(err);
err = -ENFILE;
newsock = sock_alloc(); newsock = sock_alloc();
if (!newsock) if (!newsock)
goto out; return ERR_PTR(-ENFILE);
newsock->type = sock->type; newsock->type = sock->type;
newsock->ops = sock->ops; newsock->ops = sock->ops;
...@@ -1722,18 +1714,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, ...@@ -1722,18 +1714,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/ */
__module_get(newsock->ops->owner); __module_get(newsock->ops->owner);
newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
goto out;
}
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile)) { if (IS_ERR(newfile))
err = PTR_ERR(newfile); return newfile;
put_unused_fd(newfd);
goto out;
}
err = security_socket_accept(sock, newsock); err = security_socket_accept(sock, newsock);
if (err) if (err)
...@@ -1758,16 +1741,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, ...@@ -1758,16 +1741,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
} }
/* File flags are not inherited via accept() unlike another OSes. */ /* File flags are not inherited via accept() unlike another OSes. */
return newfile;
fd_install(newfd, newfile);
err = newfd;
out:
return err;
out_fd: out_fd:
fput(newfile); fput(newfile);
put_unused_fd(newfd); return ERR_PTR(err);
goto out; }
int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags,
unsigned long nofile)
{
struct file *newfile;
int newfd;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0))
return newfd;
newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
return PTR_ERR(newfile);
}
fd_install(newfd, newfile);
return newfd;
} }
/* /*
...@@ -2181,6 +2186,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, ...@@ -2181,6 +2186,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
* Shutdown a socket. * Shutdown a socket.
*/ */
int __sys_shutdown_sock(struct socket *sock, int how)
{
int err;
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
return err;
}
int __sys_shutdown(int fd, int how) int __sys_shutdown(int fd, int how)
{ {
int err, fput_needed; int err, fput_needed;
...@@ -2188,9 +2204,7 @@ int __sys_shutdown(int fd, int how) ...@@ -2188,9 +2204,7 @@ int __sys_shutdown(int fd, int how)
sock = sockfd_lookup_light(fd, &err, &fput_needed); sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) { if (sock != NULL) {
err = security_socket_shutdown(sock, how); err = __sys_shutdown_sock(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
fput_light(sock->file, fput_needed); fput_light(sock->file, fput_needed);
} }
return err; return err;
......
...@@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags, ...@@ -421,6 +421,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags, int __user *upeer_addrlen, int flags,
unsigned long nofile); unsigned long nofile);
extern struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
...@@ -436,5 +439,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, ...@@ -436,5 +439,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len); int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol, extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec); int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how); extern int __sys_shutdown(int fd, int how);
#endif /* _LINUX_SOCKET_H */ #endif /* _LINUX_SOCKET_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册