You need to sign in or sign up before continuing.
未验证 提交 358a5fb9 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!529 Backport CVEs and bugfixes

Merge Pull Request from: @zhangjialin11 
 
Pull new CVEs:
CVE-2023-1281
CVE-2022-48423
CVE-2023-1249
CVE-2022-48425
CVE-2022-48424
CVE-2023-28327
CVE-2023-28466
CVE-2023-1380

block and md/raid6 bugfixes from Zhong Jinghua
fs bugfixes from Zhihao Cheng and Baokun Li
tty bugfix from Yi Yang
mm bugfixes from ZhangPeng and Ze Zuo
bpf bugfixes from Pu Lehui and Liu Jian
ima bugfix from GUO Zihua
softirq and arch bugfixes from Lin Yujun 
 
Link:https://gitee.com/openeuler/kernel/pulls/529 

Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -76,10 +76,14 @@ stable kernels. ...@@ -76,10 +76,14 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #853709 | N/A | | ARM | Cortex-A72 | #853709 | N/A |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
+----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 |
......
...@@ -1022,6 +1022,17 @@ config ARM_ERRATA_764369 ...@@ -1022,6 +1022,17 @@ config ARM_ERRATA_764369
relevant cache maintenance functions and sets a specific bit relevant cache maintenance functions and sets a specific bit
in the diagnostic control register of the SCU. in the diagnostic control register of the SCU.
config ARM_ERRATA_764319
bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction"
depends on CPU_V7
help
This option enables the workaround for the 764319 Cortex A-9 erratum.
CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an
unexpected Undefined Instruction exception when the DBGSWENABLE
external pin is set to 0, even when the CP14 accesses are performed
from a privileged mode. This work around catches the exception in a
way the kernel does not stop execution.
config ARM_ERRATA_775420 config ARM_ERRATA_775420
bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock" bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
depends on CPU_V7 depends on CPU_V7
......
...@@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, ...@@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
return ret; return ret;
} }
#ifdef CONFIG_ARM_ERRATA_764319
static int oslsr_fault;
static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr)
{
oslsr_fault = 1;
instruction_pointer(regs) += 4;
return 0;
}
static struct undef_hook debug_oslsr_hook = {
.instr_mask = 0xffffffff,
.instr_val = 0xee115e91,
.fn = debug_oslsr_trap,
};
#endif
/* /*
* One-time initialisation. * One-time initialisation.
*/ */
...@@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void) ...@@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void)
case ARM_DEBUG_ARCH_V7_1: case ARM_DEBUG_ARCH_V7_1:
return true; return true;
case ARM_DEBUG_ARCH_V7_ECP14: case ARM_DEBUG_ARCH_V7_ECP14:
#ifdef CONFIG_ARM_ERRATA_764319
oslsr_fault = 0;
register_undef_hook(&debug_oslsr_hook);
ARM_DBG_READ(c1, c1, 4, oslsr); ARM_DBG_READ(c1, c1, 4, oslsr);
unregister_undef_hook(&debug_oslsr_hook);
if (oslsr_fault)
return false;
#else
ARM_DBG_READ(c1, c1, 4, oslsr);
#endif
if (oslsr & ARM_OSLSR_OSLM0) if (oslsr & ARM_OSLSR_OSLM0)
return true; return true;
fallthrough; fallthrough;
......
...@@ -492,6 +492,22 @@ config ARM64_ERRATUM_834220 ...@@ -492,6 +492,22 @@ config ARM64_ERRATUM_834220
If unsure, say Y. If unsure, say Y.
config ARM64_ERRATUM_1742098
bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
depends on COMPAT
default y
help
This option removes the AES hwcap for aarch32 user-space to
workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
Affected parts may corrupt the AES state if an interrupt is
taken between a pair of AES instructions. These instructions
are only present if the cryptography extensions are present.
All software should have a fallback implementation for CPUs
that don't implement the cryptography extensions.
If unsure, say Y.
config ARM64_ERRATUM_845719 config ARM64_ERRATUM_845719
bool "Cortex-A53: 845719: a load might read incorrect data" bool "Cortex-A53: 845719: a load might read incorrect data"
depends on AARCH32_EL0 depends on AARCH32_EL0
......
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#define ARM64_HAS_ECV 64 #define ARM64_HAS_ECV 64
#define ARM64_HAS_EPAN 65 #define ARM64_HAS_EPAN 65
#define ARM64_SPECTRE_BHB 66 #define ARM64_SPECTRE_BHB 66
#define ARM64_WORKAROUND_1742098 67
#define ARM64_NCAPS 80 #define ARM64_NCAPS 80
......
...@@ -427,6 +427,14 @@ static const struct midr_range erratum_1463225[] = { ...@@ -427,6 +427,14 @@ static const struct midr_range erratum_1463225[] = {
}; };
#endif #endif
#ifdef CONFIG_ARM64_ERRATUM_1742098
static struct midr_range broken_aarch32_aes[] = {
MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
};
#endif
const struct arm64_cpu_capabilities arm64_errata[] = { const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{ {
...@@ -626,6 +634,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ...@@ -626,6 +634,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
1, 0), 1, 0),
}, },
#endif #endif
#ifdef CONFIG_ARM64_ERRATUM_1742098
{
.desc = "ARM erratum 1742098",
.capability = ARM64_WORKAROUND_1742098,
CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
},
#endif
#ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH
{ {
.desc = "HiSilicon HIP08 Cache Readunique Prefetch Disable", .desc = "HiSilicon HIP08 Cache Readunique Prefetch Disable",
......
...@@ -76,6 +76,7 @@ ...@@ -76,6 +76,7 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/cpu_ops.h> #include <asm/cpu_ops.h>
#include <asm/fpsimd.h> #include <asm/fpsimd.h>
#include <asm/hwcap.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/mte.h> #include <asm/mte.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -1771,6 +1772,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) ...@@ -1771,6 +1772,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
} }
#endif /* CONFIG_ARM64_MTE */ #endif /* CONFIG_ARM64_MTE */
static void elf_hwcap_fixup(void)
{
#ifdef CONFIG_ARM64_ERRATUM_1742098
if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
a32_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
#endif /* ARM64_ERRATUM_1742098 */
}
/* Internal helper functions to match cpu capability type */ /* Internal helper functions to match cpu capability type */
static bool static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
...@@ -2837,8 +2846,10 @@ void __init setup_cpu_features(void) ...@@ -2837,8 +2846,10 @@ void __init setup_cpu_features(void)
setup_system_capabilities(); setup_system_capabilities();
setup_elf_hwcaps(arm64_elf_hwcaps); setup_elf_hwcaps(arm64_elf_hwcaps);
if (system_supports_32bit_el0()) if (system_supports_32bit_el0()) {
setup_elf_hwcaps(a32_elf_hwcaps); setup_elf_hwcaps(a32_elf_hwcaps);
elf_hwcap_fixup();
}
if (system_uses_ttbr0_pan()) if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
......
...@@ -400,8 +400,7 @@ void blk_cleanup_queue(struct request_queue *q) ...@@ -400,8 +400,7 @@ void blk_cleanup_queue(struct request_queue *q)
* prevent that blk_mq_run_hw_queues() accesses the hardware queues * prevent that blk_mq_run_hw_queues() accesses the hardware queues
* after draining finished. * after draining finished.
*/ */
blk_freeze_queue_start(q); blk_freeze_queue(q);
blk_mq_freeze_queue_wait_sync(q);
rq_qos_exit(q); rq_qos_exit(q);
...@@ -432,8 +431,6 @@ void blk_cleanup_queue(struct request_queue *q) ...@@ -432,8 +431,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_mq_sched_free_rqs(q); blk_mq_sched_free_rqs(q);
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);
percpu_ref_exit(&q->q_usage_counter);
/* @q is and will stay empty, shutdown and put */ /* @q is and will stay empty, shutdown and put */
blk_put_queue(q); blk_put_queue(q);
} }
...@@ -518,7 +515,6 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) ...@@ -518,7 +515,6 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
struct request_queue *q = struct request_queue *q =
container_of(ref, struct request_queue, q_usage_counter); container_of(ref, struct request_queue, q_usage_counter);
blk_queue_flag_set(QUEUE_FLAG_USAGE_COUNT_SYNC, q);
wake_up_all(&q->mq_freeze_wq); wake_up_all(&q->mq_freeze_wq);
} }
......
...@@ -195,7 +195,6 @@ void blk_freeze_queue_start(struct request_queue *q) ...@@ -195,7 +195,6 @@ void blk_freeze_queue_start(struct request_queue *q)
{ {
mutex_lock(&q->mq_freeze_lock); mutex_lock(&q->mq_freeze_lock);
if (++q->mq_freeze_depth == 1) { if (++q->mq_freeze_depth == 1) {
blk_queue_flag_clear(QUEUE_FLAG_USAGE_COUNT_SYNC, q);
percpu_ref_kill(&q->q_usage_counter); percpu_ref_kill(&q->q_usage_counter);
mutex_unlock(&q->mq_freeze_lock); mutex_unlock(&q->mq_freeze_lock);
if (queue_is_mq(q)) if (queue_is_mq(q))
...@@ -206,12 +205,6 @@ void blk_freeze_queue_start(struct request_queue *q) ...@@ -206,12 +205,6 @@ void blk_freeze_queue_start(struct request_queue *q)
} }
EXPORT_SYMBOL_GPL(blk_freeze_queue_start); EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
void blk_mq_freeze_queue_wait_sync(struct request_queue *q)
{
wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter) &&
test_bit(QUEUE_FLAG_USAGE_COUNT_SYNC, &q->queue_flags));
}
void blk_mq_freeze_queue_wait(struct request_queue *q) void blk_mq_freeze_queue_wait(struct request_queue *q)
{ {
wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
......
...@@ -726,6 +726,8 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) ...@@ -726,6 +726,8 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{ {
struct request_queue *q = container_of(rcu_head, struct request_queue, struct request_queue *q = container_of(rcu_head, struct request_queue,
rcu_head); rcu_head);
percpu_ref_exit(&q->q_usage_counter);
kmem_cache_free(blk_requestq_cachep, q); kmem_cache_free(blk_requestq_cachep, q);
} }
......
...@@ -5429,7 +5429,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5429,7 +5429,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock(); rcu_read_unlock();
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set); align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone); md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
raid_bio->bi_next = (void *)rdev; raid_bio->bi_next = (void *)rdev;
......
...@@ -5831,6 +5831,11 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg, ...@@ -5831,6 +5831,11 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg,
(struct brcmf_cfg80211_assoc_ielen_le *)cfg->extra_buf; (struct brcmf_cfg80211_assoc_ielen_le *)cfg->extra_buf;
req_len = le32_to_cpu(assoc_info->req_len); req_len = le32_to_cpu(assoc_info->req_len);
resp_len = le32_to_cpu(assoc_info->resp_len); resp_len = le32_to_cpu(assoc_info->resp_len);
if (req_len > WL_EXTRA_BUF_MAX || resp_len > WL_EXTRA_BUF_MAX) {
bphy_err(drvr, "invalid lengths in assoc info: req %u resp %u\n",
req_len, resp_len);
return -EINVAL;
}
if (req_len) { if (req_len) {
err = brcmf_fil_iovar_data_get(ifp, "assoc_req_ies", err = brcmf_fil_iovar_data_get(ifp, "assoc_req_ies",
cfg->extra_buf, cfg->extra_buf,
......
...@@ -1237,14 +1237,16 @@ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver, ...@@ -1237,14 +1237,16 @@ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
{ {
struct tty_struct *tty; struct tty_struct *tty;
if (driver->ops->lookup) if (driver->ops->lookup) {
if (!file) if (!file)
tty = ERR_PTR(-EIO); tty = ERR_PTR(-EIO);
else else
tty = driver->ops->lookup(driver, file, idx); tty = driver->ops->lookup(driver, file, idx);
else } else {
if (idx >= driver->num)
return ERR_PTR(-EINVAL);
tty = driver->ttys[idx]; tty = driver->ttys[idx];
}
if (!IS_ERR(tty)) if (!IS_ERR(tty))
tty_kref_get(tty); tty_kref_get(tty);
return tty; return tty;
......
...@@ -1613,17 +1613,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, ...@@ -1613,17 +1613,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs * long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/ */
static int fill_files_note(struct memelfnote *note) static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
{ {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n; unsigned count, size, names_ofs, remaining, n;
user_long_t *data; user_long_t *data;
user_long_t *start_end_ofs; user_long_t *start_end_ofs;
char *name_base, *name_curpos; char *name_base, *name_curpos;
int i;
/* *Estimated* file count and total data size needed */ /* *Estimated* file count and total data size needed */
count = mm->map_count; count = cprm->vma_count;
if (count > UINT_MAX / 64) if (count > UINT_MAX / 64)
return -EINVAL; return -EINVAL;
size = count * 64; size = count * 64;
...@@ -1645,11 +1644,12 @@ static int fill_files_note(struct memelfnote *note) ...@@ -1645,11 +1644,12 @@ static int fill_files_note(struct memelfnote *note)
name_base = name_curpos = ((char *)data) + names_ofs; name_base = name_curpos = ((char *)data) + names_ofs;
remaining = size - names_ofs; remaining = size - names_ofs;
count = 0; count = 0;
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { for (i = 0; i < cprm->vma_count; i++) {
struct core_vma_metadata *m = &cprm->vma_meta[i];
struct file *file; struct file *file;
const char *filename; const char *filename;
file = vma->vm_file; file = m->file;
if (!file) if (!file)
continue; continue;
filename = file_path(file, name_curpos, remaining); filename = file_path(file, name_curpos, remaining);
...@@ -1669,9 +1669,9 @@ static int fill_files_note(struct memelfnote *note) ...@@ -1669,9 +1669,9 @@ static int fill_files_note(struct memelfnote *note)
memmove(name_curpos, filename, n); memmove(name_curpos, filename, n);
name_curpos += n; name_curpos += n;
*start_end_ofs++ = vma->vm_start; *start_end_ofs++ = m->start;
*start_end_ofs++ = vma->vm_end; *start_end_ofs++ = m->end;
*start_end_ofs++ = vma->vm_pgoff; *start_end_ofs++ = m->pgoff;
count++; count++;
} }
...@@ -1682,7 +1682,7 @@ static int fill_files_note(struct memelfnote *note) ...@@ -1682,7 +1682,7 @@ static int fill_files_note(struct memelfnote *note)
* Count usually is less than mm->map_count, * Count usually is less than mm->map_count,
* we need to move filenames down. * we need to move filenames down.
*/ */
n = mm->map_count - count; n = cprm->vma_count - count;
if (n != 0) { if (n != 0) {
unsigned shift_bytes = n * 3 * sizeof(data[0]); unsigned shift_bytes = n * 3 * sizeof(data[0]);
memmove(name_base - shift_bytes, name_base, memmove(name_base - shift_bytes, name_base,
...@@ -1797,7 +1797,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, ...@@ -1797,7 +1797,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
static int fill_note_info(struct elfhdr *elf, int phdrs, static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info, struct elf_note_info *info,
const kernel_siginfo_t *siginfo, struct pt_regs *regs) struct coredump_params *cprm)
{ {
struct task_struct *dump_task = current; struct task_struct *dump_task = current;
const struct user_regset_view *view = task_user_regset_view(dump_task); const struct user_regset_view *view = task_user_regset_view(dump_task);
...@@ -1869,7 +1869,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -1869,7 +1869,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
* Now fill in each thread's information. * Now fill in each thread's information.
*/ */
for (t = info->thread; t != NULL; t = t->next) for (t = info->thread; t != NULL; t = t->next)
if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size))
return 0; return 0;
/* /*
...@@ -1878,13 +1878,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -1878,13 +1878,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
info->size += notesize(&info->psinfo); info->size += notesize(&info->psinfo);
fill_siginfo_note(&info->signote, &info->csigdata, siginfo); fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
info->size += notesize(&info->signote); info->size += notesize(&info->signote);
fill_auxv_note(&info->auxv, current->mm); fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv); info->size += notesize(&info->auxv);
if (fill_files_note(&info->files) == 0) if (fill_files_note(&info->files, cprm) == 0)
info->size += notesize(&info->files); info->size += notesize(&info->files);
return 1; return 1;
...@@ -2026,7 +2026,7 @@ static int elf_note_info_init(struct elf_note_info *info) ...@@ -2026,7 +2026,7 @@ static int elf_note_info_init(struct elf_note_info *info)
static int fill_note_info(struct elfhdr *elf, int phdrs, static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info, struct elf_note_info *info,
const kernel_siginfo_t *siginfo, struct pt_regs *regs) struct coredump_params *cprm)
{ {
struct core_thread *ct; struct core_thread *ct;
struct elf_thread_status *ets; struct elf_thread_status *ets;
...@@ -2047,13 +2047,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -2047,13 +2047,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
list_for_each_entry(ets, &info->thread_list, list) { list_for_each_entry(ets, &info->thread_list, list) {
int sz; int sz;
sz = elf_dump_thread_status(siginfo->si_signo, ets); sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
info->thread_status_size += sz; info->thread_status_size += sz;
} }
/* now collect the dump for the current */ /* now collect the dump for the current */
memset(info->prstatus, 0, sizeof(*info->prstatus)); memset(info->prstatus, 0, sizeof(*info->prstatus));
fill_prstatus(info->prstatus, current, siginfo->si_signo); fill_prstatus(info->prstatus, current, cprm->siginfo->si_signo);
elf_core_copy_regs(&info->prstatus->pr_reg, regs); elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
/* Set up header */ /* Set up header */
fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
...@@ -2069,18 +2069,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, ...@@ -2069,18 +2069,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_note(info->notes + 1, "CORE", NT_PRPSINFO, fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
sizeof(*info->psinfo), info->psinfo); sizeof(*info->psinfo), info->psinfo);
fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
fill_auxv_note(info->notes + 3, current->mm); fill_auxv_note(info->notes + 3, current->mm);
info->numnote = 4; info->numnote = 4;
if (fill_files_note(info->notes + info->numnote) == 0) { if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
info->notes_files = info->notes + info->numnote; info->notes_files = info->notes + info->numnote;
info->numnote++; info->numnote++;
} }
/* Try to dump the FPU. */ /* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, info->prstatus->pr_fpvalid =
info->fpu); elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
if (info->prstatus->pr_fpvalid) if (info->prstatus->pr_fpvalid)
fill_note(info->notes + info->numnote++, fill_note(info->notes + info->numnote++,
"CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
...@@ -2166,8 +2166,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, ...@@ -2166,8 +2166,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
static int elf_core_dump(struct coredump_params *cprm) static int elf_core_dump(struct coredump_params *cprm)
{ {
int has_dumped = 0; int has_dumped = 0;
int vma_count, segs, i; int segs, i;
size_t vma_data_size;
struct elfhdr elf; struct elfhdr elf;
loff_t offset = 0, dataoff; loff_t offset = 0, dataoff;
struct elf_note_info info = { }; struct elf_note_info info = { };
...@@ -2175,16 +2174,12 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2175,16 +2174,12 @@ static int elf_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL; struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum; Elf_Half e_phnum;
elf_addr_t e_shoff; elf_addr_t e_shoff;
struct core_vma_metadata *vma_meta;
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
return 0;
/* /*
* The number of segs are recored into ELF header as 16bit value. * The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/ */
segs = vma_count + elf_core_extra_phdrs(); segs = cprm->vma_count + elf_core_extra_phdrs();
/* for notes section */ /* for notes section */
segs++; segs++;
...@@ -2198,7 +2193,7 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2198,7 +2193,7 @@ static int elf_core_dump(struct coredump_params *cprm)
* Collect all the non-memory information about the process for the * Collect all the non-memory information about the process for the
* notes. This also sets up the file header. * notes. This also sets up the file header.
*/ */
if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs)) if (!fill_note_info(&elf, e_phnum, &info, cprm))
goto end_coredump; goto end_coredump;
has_dumped = 1; has_dumped = 1;
...@@ -2222,7 +2217,7 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2222,7 +2217,7 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += vma_data_size; offset += cprm->vma_data_size;
offset += elf_core_extra_data_size(); offset += elf_core_extra_data_size();
e_shoff = offset; e_shoff = offset;
...@@ -2242,8 +2237,8 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2242,8 +2237,8 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump; goto end_coredump;
/* Write program headers for segments dump */ /* Write program headers for segments dump */
for (i = 0; i < vma_count; i++) { for (i = 0; i < cprm->vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i; struct core_vma_metadata *meta = cprm->vma_meta + i;
struct elf_phdr phdr; struct elf_phdr phdr;
phdr.p_type = PT_LOAD; phdr.p_type = PT_LOAD;
...@@ -2280,8 +2275,8 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2280,8 +2275,8 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos)) if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump; goto end_coredump;
for (i = 0; i < vma_count; i++) { for (i = 0; i < cprm->vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i; struct core_vma_metadata *meta = cprm->vma_meta + i;
if (!dump_user_range(cprm, meta->start, meta->dump_size)) if (!dump_user_range(cprm, meta->start, meta->dump_size))
goto end_coredump; goto end_coredump;
...@@ -2299,7 +2294,6 @@ static int elf_core_dump(struct coredump_params *cprm) ...@@ -2299,7 +2294,6 @@ static int elf_core_dump(struct coredump_params *cprm)
end_coredump: end_coredump:
free_note_info(&info); free_note_info(&info);
kfree(shdr4extnum); kfree(shdr4extnum);
kvfree(vma_meta);
kfree(phdr4note); kfree(phdr4note);
return has_dumped; return has_dumped;
} }
......
...@@ -1479,7 +1479,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm, ...@@ -1479,7 +1479,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
static int elf_fdpic_core_dump(struct coredump_params *cprm) static int elf_fdpic_core_dump(struct coredump_params *cprm)
{ {
int has_dumped = 0; int has_dumped = 0;
int vma_count, segs; int segs;
int i; int i;
struct elfhdr *elf = NULL; struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff; loff_t offset = 0, dataoff;
...@@ -1494,8 +1494,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1494,8 +1494,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
elf_addr_t e_shoff; elf_addr_t e_shoff;
struct core_thread *ct; struct core_thread *ct;
struct elf_thread_status *tmp; struct elf_thread_status *tmp;
struct core_vma_metadata *vma_meta = NULL;
size_t vma_data_size;
/* alloc memory for large data structures: too large to be on stack */ /* alloc memory for large data structures: too large to be on stack */
elf = kmalloc(sizeof(*elf), GFP_KERNEL); elf = kmalloc(sizeof(*elf), GFP_KERNEL);
...@@ -1505,9 +1503,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1505,9 +1503,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!psinfo) if (!psinfo)
goto end_coredump; goto end_coredump;
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
goto end_coredump;
for (ct = current->mm->core_state->dumper.next; for (ct = current->mm->core_state->dumper.next;
ct; ct = ct->next) { ct; ct = ct->next) {
tmp = elf_dump_thread_status(cprm->siginfo->si_signo, tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
...@@ -1527,7 +1522,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1527,7 +1522,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list; tmp->next = thread_list;
thread_list = tmp; thread_list = tmp;
segs = vma_count + elf_core_extra_phdrs(); segs = cprm->vma_count + elf_core_extra_phdrs();
/* for notes section */ /* for notes section */
segs++; segs++;
...@@ -1572,7 +1567,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1572,7 +1567,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
/* Page-align dumped data */ /* Page-align dumped data */
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += vma_data_size; offset += cprm->vma_data_size;
offset += elf_core_extra_data_size(); offset += elf_core_extra_data_size();
e_shoff = offset; e_shoff = offset;
...@@ -1592,8 +1587,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1592,8 +1587,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump; goto end_coredump;
/* write program headers for segments dump */ /* write program headers for segments dump */
for (i = 0; i < vma_count; i++) { for (i = 0; i < cprm->vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i; struct core_vma_metadata *meta = cprm->vma_meta + i;
struct elf_phdr phdr; struct elf_phdr phdr;
size_t sz; size_t sz;
...@@ -1643,7 +1638,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1643,7 +1638,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos)) if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump; goto end_coredump;
if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count)) if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count))
goto end_coredump; goto end_coredump;
if (!elf_core_write_extra_data(cprm)) if (!elf_core_write_extra_data(cprm))
...@@ -1667,7 +1662,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) ...@@ -1667,7 +1662,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
thread_list = thread_list->next; thread_list = thread_list->next;
kfree(tmp); kfree(tmp);
} }
kvfree(vma_meta);
kfree(phdr4note); kfree(phdr4note);
kfree(elf); kfree(elf);
kfree(psinfo); kfree(psinfo);
......
...@@ -53,6 +53,9 @@ ...@@ -53,6 +53,9 @@
#include <trace/events/sched.h> #include <trace/events/sched.h>
static bool dump_vma_snapshot(struct coredump_params *cprm);
static void free_vma_snapshot(struct coredump_params *cprm);
int core_uses_pid; int core_uses_pid;
unsigned int core_pipe_limit; unsigned int core_pipe_limit;
char core_pattern[CORENAME_MAX_SIZE] = "core"; char core_pattern[CORENAME_MAX_SIZE] = "core";
...@@ -602,6 +605,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) ...@@ -602,6 +605,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* by any locks. * by any locks.
*/ */
.mm_flags = mm->flags, .mm_flags = mm->flags,
.vma_meta = NULL,
}; };
audit_core_dumps(siginfo->si_signo); audit_core_dumps(siginfo->si_signo);
...@@ -807,9 +811,13 @@ void do_coredump(const kernel_siginfo_t *siginfo) ...@@ -807,9 +811,13 @@ void do_coredump(const kernel_siginfo_t *siginfo)
pr_info("Core dump to |%s disabled\n", cn.corename); pr_info("Core dump to |%s disabled\n", cn.corename);
goto close_fail; goto close_fail;
} }
if (!dump_vma_snapshot(&cprm))
goto close_fail;
file_start_write(cprm.file); file_start_write(cprm.file);
core_dumped = binfmt->core_dump(&cprm); core_dumped = binfmt->core_dump(&cprm);
file_end_write(cprm.file); file_end_write(cprm.file);
free_vma_snapshot(&cprm);
} }
if (ispipe && core_pipe_limit) if (ispipe && core_pipe_limit)
wait_for_dump_helpers(cprm.file); wait_for_dump_helpers(cprm.file);
...@@ -1081,18 +1089,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, ...@@ -1081,18 +1089,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
return gate_vma; return gate_vma;
} }
static void free_vma_snapshot(struct coredump_params *cprm)
{
if (cprm->vma_meta) {
int i;
for (i = 0; i < cprm->vma_count; i++) {
struct file *file = cprm->vma_meta[i].file;
if (file)
fput(file);
}
kvfree(cprm->vma_meta);
cprm->vma_meta = NULL;
}
}
/* /*
* Under the mmap_lock, take a snapshot of relevant information about the task's * Under the mmap_lock, take a snapshot of relevant information about the task's
* VMAs. * VMAs.
*/ */
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, static bool dump_vma_snapshot(struct coredump_params *cprm)
struct core_vma_metadata **vma_meta,
size_t *vma_data_size_ptr)
{ {
struct vm_area_struct *vma, *gate_vma; struct vm_area_struct *vma, *gate_vma;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
int i; int i;
size_t vma_data_size = 0;
/* /*
* Once the stack expansion code is fixed to not change VMA bounds * Once the stack expansion code is fixed to not change VMA bounds
...@@ -1100,36 +1119,37 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, ...@@ -1100,36 +1119,37 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
* mmap_lock in read mode. * mmap_lock in read mode.
*/ */
if (mmap_write_lock_killable(mm)) if (mmap_write_lock_killable(mm))
return -EINTR; return false;
cprm->vma_data_size = 0;
gate_vma = get_gate_vma(mm); gate_vma = get_gate_vma(mm);
*vma_count = mm->map_count + (gate_vma ? 1 : 0); cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
*vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL); cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
if (!*vma_meta) { if (!cprm->vma_meta) {
mmap_write_unlock(mm); mmap_write_unlock(mm);
return -ENOMEM; return false;
} }
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma), i++) { vma = next_vma(vma, gate_vma), i++) {
struct core_vma_metadata *m = (*vma_meta) + i; struct core_vma_metadata *m = cprm->vma_meta + i;
m->start = vma->vm_start; m->start = vma->vm_start;
m->end = vma->vm_end; m->end = vma->vm_end;
m->flags = vma->vm_flags; m->flags = vma->vm_flags;
m->dump_size = vma_dump_size(vma, cprm->mm_flags); m->dump_size = vma_dump_size(vma, cprm->mm_flags);
m->pgoff = vma->vm_pgoff;
m->file = vma->vm_file;
if (m->file)
get_file(m->file);
} }
mmap_write_unlock(mm); mmap_write_unlock(mm);
if (WARN_ON(i != *vma_count)) { for (i = 0; i < cprm->vma_count; i++) {
kvfree(*vma_meta); struct core_vma_metadata *m = cprm->vma_meta + i;
return -EFAULT;
}
for (i = 0; i < *vma_count; i++) {
struct core_vma_metadata *m = (*vma_meta) + i;
if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
char elfmag[SELFMAG]; char elfmag[SELFMAG];
...@@ -1142,9 +1162,8 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, ...@@ -1142,9 +1162,8 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
} }
} }
vma_data_size += m->dump_size; cprm->vma_data_size += m->dump_size;
} }
*vma_data_size_ptr = vma_data_size; return true;
return 0;
} }
...@@ -5539,6 +5539,7 @@ static int ext4_load_journal(struct super_block *sb, ...@@ -5539,6 +5539,7 @@ static int ext4_load_journal(struct super_block *sb,
err = jbd2_journal_wipe(journal, !really_read_only); err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) { if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
if (save) if (save)
memcpy(save, ((char *) es) + memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN); EXT4_S_ERR_START, EXT4_S_ERR_LEN);
...@@ -5547,6 +5548,14 @@ static int ext4_load_journal(struct super_block *sb, ...@@ -5547,6 +5548,14 @@ static int ext4_load_journal(struct super_block *sb,
memcpy(((char *) es) + EXT4_S_ERR_START, memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN); save, EXT4_S_ERR_LEN);
kfree(save); kfree(save);
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
EXT4_ERROR_FS);
/* Write out restored error information to the superblock */
if (!bdev_read_only(sb->s_bdev)) {
int err2;
err2 = ext4_commit_super(sb);
err = err ? : err2;
}
} }
if (err) { if (err) {
...@@ -5769,11 +5778,13 @@ static int ext4_clear_journal_err(struct super_block *sb, ...@@ -5769,11 +5778,13 @@ static int ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf); errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded " ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr); "from previous mount: %s", errstr);
ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb); j_errno = ext4_commit_super(sb);
if (j_errno)
return j_errno;
ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal); jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal); jbd2_journal_update_sb_errno(journal);
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include "../internal.h" #include "../internal.h"
#define IOEND_BATCH_SIZE 4096
/* /*
* Structure allocated for each page or THP when block size < page size * Structure allocated for each page or THP when block size < page size
* to track sub-page uptodate status and I/O completions. * to track sub-page uptodate status and I/O completions.
...@@ -1061,7 +1063,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page, ...@@ -1061,7 +1063,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
* state, release holds on bios, and finally free up memory. Do not use the * state, release holds on bios, and finally free up memory. Do not use the
* ioend after this. * ioend after this.
*/ */
static void static u32
iomap_finish_ioend(struct iomap_ioend *ioend, int error) iomap_finish_ioend(struct iomap_ioend *ioend, int error)
{ {
struct inode *inode = ioend->io_inode; struct inode *inode = ioend->io_inode;
...@@ -1070,6 +1072,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) ...@@ -1070,6 +1072,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
u64 start = bio->bi_iter.bi_sector; u64 start = bio->bi_iter.bi_sector;
loff_t offset = ioend->io_offset; loff_t offset = ioend->io_offset;
bool quiet = bio_flagged(bio, BIO_QUIET); bool quiet = bio_flagged(bio, BIO_QUIET);
u32 folio_count = 0;
for (bio = &ioend->io_inline_bio; bio; bio = next) { for (bio = &ioend->io_inline_bio; bio; bio = next) {
struct bio_vec *bv; struct bio_vec *bv;
...@@ -1085,9 +1088,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) ...@@ -1085,9 +1088,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
next = bio->bi_private; next = bio->bi_private;
/* walk each page on bio, ending page IO on them */ /* walk each page on bio, ending page IO on them */
bio_for_each_segment_all(bv, bio, iter_all) bio_for_each_segment_all(bv, bio, iter_all) {
iomap_finish_page_writeback(inode, bv->bv_page, error, iomap_finish_page_writeback(inode, bv->bv_page, error,
bv->bv_len); bv->bv_len);
folio_count++;
}
bio_put(bio); bio_put(bio);
} }
/* The ioend has been freed by bio_put() */ /* The ioend has been freed by bio_put() */
...@@ -1097,20 +1102,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) ...@@ -1097,20 +1102,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
"%s: writeback error on inode %lu, offset %lld, sector %llu", "%s: writeback error on inode %lu, offset %lld, sector %llu",
inode->i_sb->s_id, inode->i_ino, offset, start); inode->i_sb->s_id, inode->i_ino, offset, start);
} }
return folio_count;
} }
/*
* Ioend completion routine for merged bios. This can only be called from task
* contexts as merged ioends can be of unbound length. Hence we have to break up
* the writeback completions into manageable chunks to avoid long scheduler
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
* good batch processing throughput without creating adverse scheduler latency
* conditions.
*/
void void
iomap_finish_ioends(struct iomap_ioend *ioend, int error) iomap_finish_ioends(struct iomap_ioend *ioend, int error)
{ {
struct list_head tmp; struct list_head tmp;
u32 completions;
might_sleep();
list_replace_init(&ioend->io_list, &tmp); list_replace_init(&ioend->io_list, &tmp);
iomap_finish_ioend(ioend, error); completions = iomap_finish_ioend(ioend, error);
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
if (completions > IOEND_BATCH_SIZE * 8) {
cond_resched();
completions = 0;
}
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
list_del_init(&ioend->io_list); list_del_init(&ioend->io_list);
iomap_finish_ioend(ioend, error); completions += iomap_finish_ioend(ioend, error);
} }
} }
EXPORT_SYMBOL_GPL(iomap_finish_ioends); EXPORT_SYMBOL_GPL(iomap_finish_ioends);
...@@ -1131,6 +1152,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next) ...@@ -1131,6 +1152,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
return false; return false;
if (ioend->io_offset + ioend->io_size != next->io_offset) if (ioend->io_offset + ioend->io_size != next->io_offset)
return false; return false;
/*
* Do not merge physically discontiguous ioends. The filesystem
* completion functions will have to iterate the physical
* discontiguities even if we merge the ioends at a logical level, so
* we don't gain anything by merging physical discontiguities here.
*
* We cannot use bio->bi_iter.bi_sector here as it is modified during
* submission so does not point to the start sector of the bio at
* completion.
*/
if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
return false;
return true; return true;
} }
...@@ -1236,9 +1269,11 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, ...@@ -1236,9 +1269,11 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
ioend->io_flags = wpc->iomap.flags; ioend->io_flags = wpc->iomap.flags;
ioend->io_inode = inode; ioend->io_inode = inode;
ioend->io_size = 0; ioend->io_size = 0;
ioend->io_folios = 0;
ioend->io_offset = offset; ioend->io_offset = offset;
ioend->io_private = NULL; ioend->io_private = NULL;
ioend->io_bio = bio; ioend->io_bio = bio;
ioend->io_sector = sector;
return ioend; return ioend;
} }
...@@ -1279,6 +1314,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset, ...@@ -1279,6 +1314,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
return false; return false;
if (sector != bio_end_sector(wpc->ioend->io_bio)) if (sector != bio_end_sector(wpc->ioend->io_bio))
return false; return false;
/*
* Limit ioend bio chain lengths to minimise IO completion latency. This
* also prevents long tight loops ending page writeback on all the
* folios in the ioend.
*/
if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE)
return false;
return true; return true;
} }
...@@ -1372,6 +1414,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, ...@@ -1372,6 +1414,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
&submit_list); &submit_list);
count++; count++;
} }
if (count)
wpc->ioend->io_folios++;
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list)); WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
WARN_ON_ONCE(!PageLocked(page)); WARN_ON_ONCE(!PageLocked(page));
......
...@@ -98,6 +98,12 @@ static struct inode *ntfs_read_mft(struct inode *inode, ...@@ -98,6 +98,12 @@ static struct inode *ntfs_read_mft(struct inode *inode,
/* Record should contain $I30 root. */ /* Record should contain $I30 root. */
is_dir = rec->flags & RECORD_FLAG_DIR; is_dir = rec->flags & RECORD_FLAG_DIR;
/* MFT_REC_MFT is not a dir */
if (is_dir && ino == MFT_REC_MFT) {
err = -EINVAL;
goto out;
}
inode->i_generation = le16_to_cpu(rec->seq); inode->i_generation = le16_to_cpu(rec->seq);
/* Enumerate all struct Attributes MFT. */ /* Enumerate all struct Attributes MFT. */
...@@ -129,6 +135,9 @@ static struct inode *ntfs_read_mft(struct inode *inode, ...@@ -129,6 +135,9 @@ static struct inode *ntfs_read_mft(struct inode *inode,
rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size); rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size);
asize = le32_to_cpu(attr->size); asize = le32_to_cpu(attr->size);
if (le16_to_cpu(attr->name_off) + attr->name_len > asize)
goto out;
switch (attr->type) { switch (attr->type) {
case ATTR_STD: case ATTR_STD:
if (attr->non_res || if (attr->non_res ||
......
...@@ -260,6 +260,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) ...@@ -260,6 +260,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
if (t16 + t32 > asize) if (t16 + t32 > asize)
return NULL; return NULL;
if (attr->name_len &&
le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > t16) {
return NULL;
}
return attr; return attr;
} }
......
...@@ -191,7 +191,20 @@ xfs_ioend_merge_private( ...@@ -191,7 +191,20 @@ xfs_ioend_merge_private(
} }
} }
/* Finish all pending io completions. */ /*
* Finish all pending IO completions that require transactional modifications.
*
* We try to merge physical and logically contiguous ioends before completion to
* minimise the number of transactions we need to perform during IO completion.
* Both unwritten extent conversion and COW remapping need to iterate and modify
* one physical extent at a time, so we gain nothing by merging physically
* discontiguous extents here.
*
* The ioend chain length that we can be processing here is largely unbound in
* length and we may have to perform significant amounts of work on each ioend
* to complete it. Hence we have to be careful about holding the CPU for too
* long in this loop.
*/
void void
xfs_end_io( xfs_end_io(
struct work_struct *work) struct work_struct *work)
...@@ -212,6 +225,7 @@ xfs_end_io( ...@@ -212,6 +225,7 @@ xfs_end_io(
list_del_init(&ioend->io_list); list_del_init(&ioend->io_list);
iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private); iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private);
xfs_end_ioend(ioend); xfs_end_ioend(ioend);
cond_resched();
} }
} }
......
...@@ -82,6 +82,9 @@ struct coredump_params { ...@@ -82,6 +82,9 @@ struct coredump_params {
unsigned long mm_flags; unsigned long mm_flags;
loff_t written; loff_t written;
loff_t pos; loff_t pos;
KABI_EXTEND(int vma_count)
KABI_EXTEND(size_t vma_data_size)
KABI_EXTEND(struct core_vma_metadata *vma_meta)
}; };
/* /*
......
...@@ -566,7 +566,6 @@ void blk_mq_freeze_queue(struct request_queue *q); ...@@ -566,7 +566,6 @@ void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q);
void blk_freeze_queue_start(struct request_queue *q); void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q);
void blk_mq_freeze_queue_wait_sync(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout); unsigned long timeout);
......
...@@ -643,8 +643,6 @@ struct request_queue { ...@@ -643,8 +643,6 @@ struct request_queue {
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
/*at least one blk-mq hctx can't get driver tag */ /*at least one blk-mq hctx can't get driver tag */
#define QUEUE_FLAG_HCTX_WAIT 30 #define QUEUE_FLAG_HCTX_WAIT 30
/* sync for q_usage_counter */
#define QUEUE_FLAG_USAGE_COUNT_SYNC 31
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_SAME_COMP) | \
......
...@@ -11,6 +11,8 @@ struct core_vma_metadata { ...@@ -11,6 +11,8 @@ struct core_vma_metadata {
unsigned long start, end; unsigned long start, end;
unsigned long flags; unsigned long flags;
unsigned long dump_size; unsigned long dump_size;
unsigned long pgoff;
struct file *file;
}; };
/* /*
...@@ -24,9 +26,6 @@ extern int dump_align(struct coredump_params *cprm, int align); ...@@ -24,9 +26,6 @@ extern int dump_align(struct coredump_params *cprm, int align);
extern void dump_truncate(struct coredump_params *cprm); extern void dump_truncate(struct coredump_params *cprm);
int dump_user_range(struct coredump_params *cprm, unsigned long start, int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len); unsigned long len);
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
struct core_vma_metadata **vma_meta,
size_t *vma_data_size_ptr);
#ifdef CONFIG_COREDUMP #ifdef CONFIG_COREDUMP
extern void do_coredump(const kernel_siginfo_t *siginfo); extern void do_coredump(const kernel_siginfo_t *siginfo);
#else #else
......
...@@ -578,12 +578,13 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ...@@ -578,12 +578,13 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *__stats; \ struct bpf_prog_stats *__stats; \
u64 __start = sched_clock(); \ u64 __start = sched_clock(); \
unsigned long flags; \
__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
__stats = this_cpu_ptr(prog->aux->stats); \ __stats = this_cpu_ptr(prog->aux->stats); \
u64_stats_update_begin(&__stats->syncp); \ flags = u64_stats_update_begin_irqsave(&__stats->syncp);\
__stats->cnt++; \ __stats->cnt++; \
__stats->nsecs += sched_clock() - __start; \ __stats->nsecs += sched_clock() - __start; \
u64_stats_update_end(&__stats->syncp); \ u64_stats_update_end_irqrestore(&__stats->syncp, flags);\
} else { \ } else { \
__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
} \ } \
......
...@@ -199,10 +199,12 @@ struct iomap_ioend { ...@@ -199,10 +199,12 @@ struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */ struct list_head io_list; /* next ioend in chain */
u16 io_type; u16 io_type;
u16 io_flags; /* IOMAP_F_* */ u16 io_flags; /* IOMAP_F_* */
u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */ struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */ size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */ loff_t io_offset; /* offset in the file */
void *io_private; /* file system private data */ void *io_private; /* file system private data */
sector_t io_sector; /* start sector of ioend */
struct bio *io_bio; /* bio being built */ struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */ struct bio io_inline_bio; /* MUST BE LAST! */
}; };
......
...@@ -3675,6 +3675,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env, ...@@ -3675,6 +3675,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
break; break;
} }
if (btf_type_is_resolve_source_only(arg_type)) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
return -EINVAL;
}
if (args[i].name_off && if (args[i].name_off &&
(!btf_name_offset_valid(btf, args[i].name_off) || (!btf_name_offset_valid(btf, args[i].name_off) ||
!btf_name_valid_identifier(btf, args[i].name_off))) { !btf_name_valid_identifier(btf, args[i].name_off))) {
......
...@@ -526,11 +526,13 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) ...@@ -526,11 +526,13 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
* Hence check that 'start' is not zero. * Hence check that 'start' is not zero.
*/ */
start) { start) {
unsigned long flags;
stats = this_cpu_ptr(prog->aux->stats); stats = this_cpu_ptr(prog->aux->stats);
u64_stats_update_begin(&stats->syncp); flags = u64_stats_update_begin_irqsave(&stats->syncp);
stats->cnt++; stats->cnt++;
stats->nsecs += sched_clock() - start; stats->nsecs += sched_clock() - start;
u64_stats_update_end(&stats->syncp); u64_stats_update_end_irqrestore(&stats->syncp, flags);
} }
migrate_enable(); migrate_enable();
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -144,7 +144,7 @@ static ssize_t write_irq_affinity(int type, struct file *file, ...@@ -144,7 +144,7 @@ static ssize_t write_irq_affinity(int type, struct file *file,
if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) if (!irq_can_set_affinity_usr(irq) || no_irq_affinity)
return -EIO; return -EIO;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
if (type) if (type)
...@@ -238,7 +238,7 @@ static ssize_t default_affinity_write(struct file *file, ...@@ -238,7 +238,7 @@ static ssize_t default_affinity_write(struct file *file,
cpumask_var_t new_value; cpumask_var_t new_value;
int err; int err;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value); err = cpumask_parse_user(buffer, count, new_value);
......
...@@ -438,7 +438,7 @@ static ssize_t prof_cpu_mask_proc_write(struct file *file, ...@@ -438,7 +438,7 @@ static ssize_t prof_cpu_mask_proc_write(struct file *file,
cpumask_var_t new_value; cpumask_var_t new_value;
int err; int err;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value); err = cpumask_parse_user(buffer, count, new_value);
......
...@@ -376,7 +376,7 @@ static inline void invoke_softirq(void) ...@@ -376,7 +376,7 @@ static inline void invoke_softirq(void)
if (ksoftirqd_running(local_softirq_pending())) if (ksoftirqd_running(local_softirq_pending()))
return; return;
if (!force_irqthreads) { if (!force_irqthreads || !__this_cpu_read(ksoftirqd)) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/* /*
* We can safely execute softirq on the current stack if * We can safely execute softirq on the current stack if
......
...@@ -1072,6 +1072,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) ...@@ -1072,6 +1072,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
return -EPERM; return -EPERM;
if (unlikely(!nmi_uaccess_okay())) if (unlikely(!nmi_uaccess_okay()))
return -EPERM; return -EPERM;
/* Task should not be pid=1 to avoid kernel panic. */
if (unlikely(is_global_init(current)))
return -EPERM;
if (irqs_disabled()) { if (irqs_disabled()) {
/* Do an early check on signal validity. Otherwise, /* Do an early check on signal validity. Otherwise,
......
...@@ -4792,7 +4792,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, ...@@ -4792,7 +4792,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
cpumask_var_t tracing_cpumask_new; cpumask_var_t tracing_cpumask_new;
int err; int err;
if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
......
...@@ -1926,7 +1926,9 @@ static inline bool is_via_compact_memory(int order) ...@@ -1926,7 +1926,9 @@ static inline bool is_via_compact_memory(int order)
static bool kswapd_is_running(pg_data_t *pgdat) static bool kswapd_is_running(pg_data_t *pgdat)
{ {
return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING); struct task_struct *t = READ_ONCE(pgdat->kswapd);
return t && (t->state == TASK_RUNNING);
} }
/* /*
...@@ -2947,7 +2949,8 @@ static int kcompactd_cpu_online(unsigned int cpu) ...@@ -2947,7 +2949,8 @@ static int kcompactd_cpu_online(unsigned int cpu)
if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
/* One of our CPUs online: restore mask */ /* One of our CPUs online: restore mask */
set_cpus_allowed_ptr(pgdat->kcompactd, mask); if (pgdat->kcompactd)
set_cpus_allowed_ptr(pgdat->kcompactd, mask);
} }
return 0; return 0;
} }
......
...@@ -1483,8 +1483,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) ...@@ -1483,8 +1483,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
node_states_clear_node(node, &arg); node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) { if (arg.status_change_nid >= 0) {
kswapd_stop(node);
kcompactd_stop(node); kcompactd_stop(node);
kswapd_stop(node);
} }
writeback_set_ratelimit(); writeback_set_ratelimit();
......
...@@ -1104,6 +1104,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) ...@@ -1104,6 +1104,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
goto check_out; goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n", pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type); si->type);
cond_resched();
spin_lock(&swap_avail_lock); spin_lock(&swap_avail_lock);
nextsi: nextsi:
......
...@@ -2637,17 +2637,14 @@ static void __vunmap(const void *addr, int deallocate_pages) ...@@ -2637,17 +2637,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
vm_remove_mappings(area, deallocate_pages); vm_remove_mappings(area, deallocate_pages);
if (deallocate_pages) { if (deallocate_pages) {
unsigned int page_order = vm_area_page_order(area);
int i; int i;
for (i = 0; i < area->nr_pages; i++) { for (i = 0; i < area->nr_pages; i += 1U << page_order) {
struct page *page = area->pages[i]; struct page *page = area->pages[i];
BUG_ON(!page); BUG_ON(!page);
/* __free_pages(page, page_order);
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
*/
__free_pages(page, 0);
} }
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
...@@ -2927,7 +2924,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, ...@@ -2927,7 +2924,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page *page; struct page *page;
int p; int p;
page = alloc_pages_node(node, gfp_mask, page_order); /* Compound pages required for remap_vmalloc_page */
page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
if (unlikely(!page)) { if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vfree() */ /* Successfully allocated i pages, free them in __vfree() */
area->nr_pages = i; area->nr_pages = i;
...@@ -2939,16 +2937,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, ...@@ -2939,16 +2937,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail; goto fail;
} }
/*
* Higher order allocations must be able to be treated as
* indepdenent small pages by callers (as they can with
* small-page vmallocs). Some drivers do their own refcounting
* on vmalloc_to_page() pages, some use page->mapping,
* page->lru, etc.
*/
if (page_order)
split_page(page, page_order);
for (p = 0; p < (1U << page_order); p++) for (p = 0; p < (1U << page_order); p++)
area->pages[i + p] = page + p; area->pages[i + p] = page + p;
......
...@@ -4329,17 +4329,19 @@ int kswapd_run(int nid) ...@@ -4329,17 +4329,19 @@ int kswapd_run(int nid)
{ {
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
int ret = 0; int ret = 0;
struct task_struct *t;
if (pgdat->kswapd) if (READ_ONCE(pgdat->kswapd))
return 0; return 0;
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); t = kthread_run(kswapd, pgdat, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) { if (IS_ERR(t)) {
/* failure at boot is fatal */ /* failure at boot is fatal */
BUG_ON(system_state < SYSTEM_RUNNING); BUG_ON(system_state < SYSTEM_RUNNING);
pr_err("Failed to start kswapd on node %d\n", nid); pr_err("Failed to start kswapd on node %d\n", nid);
ret = PTR_ERR(pgdat->kswapd); ret = PTR_ERR(t);
pgdat->kswapd = NULL; } else {
WRITE_ONCE(pgdat->kswapd, t);
} }
return ret; return ret;
} }
...@@ -4350,11 +4352,11 @@ int kswapd_run(int nid) ...@@ -4350,11 +4352,11 @@ int kswapd_run(int nid)
*/ */
void kswapd_stop(int nid) void kswapd_stop(int nid)
{ {
struct task_struct *kswapd = NODE_DATA(nid)->kswapd; struct task_struct *kswapd = READ_ONCE(NODE_DATA(nid)->kswapd);
if (kswapd) { if (kswapd) {
kthread_stop(kswapd); kthread_stop(kswapd);
NODE_DATA(nid)->kswapd = NULL; WRITE_ONCE(NODE_DATA(nid)->kswapd, NULL);
} }
} }
......
...@@ -273,6 +273,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -273,6 +273,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(flags & MSG_ERRQUEUE)) if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len); return inet_recv_error(sk, msg, len, addr_len);
if (!len)
return 0;
psock = sk_psock_get(sk); psock = sk_psock_get(sk);
if (unlikely(!psock)) if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/rcupdate.h>
#include <net/act_api.h> #include <net/act_api.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <net/pkt_cls.h> #include <net/pkt_cls.h>
...@@ -338,6 +339,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ...@@ -338,6 +339,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcf_result cr = {}; struct tcf_result cr = {};
int err, balloc = 0; int err, balloc = 0;
struct tcf_exts e; struct tcf_exts e;
bool update_h = false;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0) if (err < 0)
...@@ -455,10 +457,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ...@@ -455,10 +457,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
} }
} }
if (cp->perfect) if (cp->perfect) {
r = cp->perfect + handle; r = cp->perfect + handle;
else } else {
r = tcindex_lookup(cp, handle) ? : &new_filter_result; /* imperfect area is updated in-place using rcu */
update_h = !!tcindex_lookup(cp, handle);
r = &new_filter_result;
}
if (r == &new_filter_result) { if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL); f = kzalloc(sizeof(*f), GFP_KERNEL);
...@@ -492,7 +497,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ...@@ -492,7 +497,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
rcu_assign_pointer(tp->root, cp); rcu_assign_pointer(tp->root, cp);
if (r == &new_filter_result) { if (update_h) {
struct tcindex_filter __rcu **fp;
struct tcindex_filter *cf;
f->result.res = r->res;
tcf_exts_change(&f->result.exts, &r->exts);
/* imperfect area bucket */
fp = cp->h + (handle % cp->hash);
/* lookup the filter, guaranteed to exist */
for (cf = rcu_dereference_bh_rtnl(*fp); cf;
fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp))
if (cf->key == (u16)handle)
break;
f->next = cf->next;
cf = rcu_replace_pointer(*fp, f, 1);
tcf_exts_get_net(&cf->result.exts);
tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work);
} else if (r == &new_filter_result) {
struct tcindex_filter *nfp; struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp; struct tcindex_filter __rcu **fp;
......
...@@ -386,13 +386,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, ...@@ -386,13 +386,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
rc = -EINVAL; rc = -EINVAL;
goto out; goto out;
} }
lock_sock(sk);
memcpy(crypto_info_aes_gcm_128->iv, memcpy(crypto_info_aes_gcm_128->iv,
cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
TLS_CIPHER_AES_GCM_128_IV_SIZE); TLS_CIPHER_AES_GCM_128_IV_SIZE);
memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq, memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval, if (copy_to_user(optval,
crypto_info_aes_gcm_128, crypto_info_aes_gcm_128,
sizeof(*crypto_info_aes_gcm_128))) sizeof(*crypto_info_aes_gcm_128)))
...@@ -410,13 +408,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, ...@@ -410,13 +408,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
rc = -EINVAL; rc = -EINVAL;
goto out; goto out;
} }
lock_sock(sk);
memcpy(crypto_info_aes_gcm_256->iv, memcpy(crypto_info_aes_gcm_256->iv,
cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
TLS_CIPHER_AES_GCM_256_IV_SIZE); TLS_CIPHER_AES_GCM_256_IV_SIZE);
memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq, memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval, if (copy_to_user(optval,
crypto_info_aes_gcm_256, crypto_info_aes_gcm_256,
sizeof(*crypto_info_aes_gcm_256))) sizeof(*crypto_info_aes_gcm_256)))
...@@ -436,6 +432,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname, ...@@ -436,6 +432,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
{ {
int rc = 0; int rc = 0;
lock_sock(sk);
switch (optname) { switch (optname) {
case TLS_TX: case TLS_TX:
case TLS_RX: case TLS_RX:
...@@ -446,6 +444,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, ...@@ -446,6 +444,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
rc = -ENOPROTOOPT; rc = -ENOPROTOOPT;
break; break;
} }
release_sock(sk);
return rc; return rc;
} }
......
...@@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) ...@@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
} }
static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
struct user_namespace *user_ns)
{ {
uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
} }
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
u32 portid, u32 seq, u32 flags, int sk_ino) struct user_namespace *user_ns,
u32 portid, u32 seq, u32 flags, int sk_ino)
{ {
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct unix_diag_msg *rep; struct unix_diag_msg *rep;
...@@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r ...@@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
goto out_nlmsg_trim; goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) && if ((req->udiag_show & UDIAG_SHOW_UID) &&
sk_diag_dump_uid(sk, skb)) sk_diag_dump_uid(sk, skb, user_ns))
goto out_nlmsg_trim; goto out_nlmsg_trim;
nlmsg_end(skb, nlh); nlmsg_end(skb, nlh);
...@@ -178,7 +180,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r ...@@ -178,7 +180,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
} }
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
u32 portid, u32 seq, u32 flags) struct user_namespace *user_ns,
u32 portid, u32 seq, u32 flags)
{ {
int sk_ino; int sk_ino;
...@@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r ...@@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (!sk_ino) if (!sk_ino)
return 0; return 0;
return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
} }
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
...@@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next; goto next;
if (!(req->udiag_states & (1 << sk->sk_state))) if (!(req->udiag_states & (1 << sk->sk_state)))
goto next; goto next;
if (sk_diag_dump(sk, skb, req, if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) NLM_F_MULTI) < 0)
...@@ -285,7 +288,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, ...@@ -285,7 +288,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
if (!rep) if (!rep)
goto out; goto out;
err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, req->udiag_ino); nlh->nlmsg_seq, 0, req->udiag_ino);
if (err < 0) { if (err < 0) {
nlmsg_free(rep); nlmsg_free(rep);
......
...@@ -133,7 +133,9 @@ int ima_store_template(struct ima_template_entry *entry, ...@@ -133,7 +133,9 @@ int ima_store_template(struct ima_template_entry *entry,
entry->pcr = pcr; entry->pcr = pcr;
result = ima_add_template_entry(entry, violation, op, inode, filename); result = ima_add_template_entry(entry, violation, op, inode, filename);
if (!result && duplicated_entry) { if (result) {
kfree(duplicated_entry);
} else if (duplicated_entry) {
result = ima_add_template_entry(duplicated_entry, violation, op, result = ima_add_template_entry(duplicated_entry, violation, op,
inode, filename); inode, filename);
if (result < 0) if (result < 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册