提交 0bb20a8f 编写于 作者: K Kemeng Shi 提交者: Zheng Zengkai

etmem: fix potential UAF when walk ept page table

euleros inclusion
category: feature
feature: etmem
bugzilla: https://gitee.com/openeuler/kernel/issues/I4OODH?from=project-issue
CVE: NA

-------------------------------------------------

Kvm shadow page may be freed when etmem_scan is walking ept page table.
Hold mmu_lock when walking ept page table to avoid UAF.
To avoid holding mmu_lock for too long time, walk step module parameter
is added to control lock holding time.
Signed-off-by: NKemeng Shi <shikemeng@huawei.com>
Reviewed-by: Nlouhongxiang <louhongxiang@huawei.com>
Reviewed-by: NChen Wandun <chenwandun@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 22773910
...@@ -84,6 +84,28 @@ ...@@ -84,6 +84,28 @@
#endif #endif
#define RET_RESCAN_FLAG 0x10000
static int set_walk_step(const char *val, const struct kernel_param *kp)
{
int ret;
unsigned int n;
ret = kstrtouint(val, 0, &n);
if (ret != 0 || n == 0)
return -EINVAL;
return param_set_uint(val, kp);
}
static struct kernel_param_ops walk_step_ops = {
.set = set_walk_step,
.get = param_get_uint,
};
static unsigned int __read_mostly walk_step = 512; // in PAGE_SIZE
module_param_cb(walk_step, &walk_step_ops, &walk_step, 0644);
static unsigned long pagetype_size[16] = { static unsigned long pagetype_size[16] = {
[PTE_ACCESSED] = PAGE_SIZE, /* 4k page */ [PTE_ACCESSED] = PAGE_SIZE, /* 4k page */
[PMD_ACCESSED] = PMD_SIZE, /* 2M page */ [PMD_ACCESSED] = PMD_SIZE, /* 2M page */
...@@ -249,26 +271,13 @@ static int page_idle_copy_user(struct page_idle_ctrl *pic, ...@@ -249,26 +271,13 @@ static int page_idle_copy_user(struct page_idle_ctrl *pic,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
int bytes_read; int bytes_read;
int lc = 0; /* last copy? */
int ret; int ret;
dump_pic(pic); dump_pic(pic);
/* Break out of loop on no more progress. */
if (!pic->pie_read) {
lc = 1;
if (start < end)
start = end;
}
if (start >= end && start > pic->next_hva) {
set_next_hva(start, "TAIL-HOLE");
pic_report_addr(pic, start);
}
bytes_read = pic->pie_read; bytes_read = pic->pie_read;
if (!bytes_read) if (!bytes_read)
return 1; return 0;
ret = copy_to_user(pic->buf, pic->kpie, bytes_read); ret = copy_to_user(pic->buf, pic->kpie, bytes_read);
if (ret) if (ret)
...@@ -278,8 +287,6 @@ static int page_idle_copy_user(struct page_idle_ctrl *pic, ...@@ -278,8 +287,6 @@ static int page_idle_copy_user(struct page_idle_ctrl *pic,
pic->bytes_copied += bytes_read; pic->bytes_copied += bytes_read;
if (pic->bytes_copied >= pic->buf_size) if (pic->bytes_copied >= pic->buf_size)
return PAGE_IDLE_BUF_FULL; return PAGE_IDLE_BUF_FULL;
if (lc)
return lc;
ret = init_page_idle_ctrl_buffer(pic); ret = init_page_idle_ctrl_buffer(pic);
if (ret) if (ret)
...@@ -299,17 +306,24 @@ static int vm_walk_host_range(unsigned long long start, ...@@ -299,17 +306,24 @@ static int vm_walk_host_range(unsigned long long start,
unsigned long tmp_gpa_to_hva = pic->gpa_to_hva; unsigned long tmp_gpa_to_hva = pic->gpa_to_hva;
pic->gpa_to_hva = 0; pic->gpa_to_hva = 0;
local_irq_enable(); spin_unlock_irq(&pic->kvm->mmu_lock);
down_read(&walk->mm->mmap_lock); down_read(&walk->mm->mmap_lock);
local_irq_disable(); local_irq_disable();
ret = walk_page_range(walk->mm, start + tmp_gpa_to_hva, end + tmp_gpa_to_hva, ret = walk_page_range(walk->mm, start + tmp_gpa_to_hva, end + tmp_gpa_to_hva,
walk->ops, walk->private); walk->ops, walk->private);
local_irq_enable();
up_read(&walk->mm->mmap_lock); up_read(&walk->mm->mmap_lock);
pic->gpa_to_hva = tmp_gpa_to_hva; pic->gpa_to_hva = tmp_gpa_to_hva;
if (pic->flags & VM_SCAN_HOST) { if (pic->flags & VM_SCAN_HOST) {
pic->restart_gpa -= tmp_gpa_to_hva; pic->restart_gpa -= tmp_gpa_to_hva;
pic->flags &= ~VM_SCAN_HOST; pic->flags &= ~VM_SCAN_HOST;
} }
if (ret != PAGE_IDLE_KBUF_FULL && end > pic->restart_gpa)
pic->restart_gpa = end;
/* ept page table may change after spin_unlock, rescan vm from root ept */
ret |= RET_RESCAN_FLAG;
return ret; return ret;
} }
...@@ -515,30 +529,40 @@ static int ept_page_range(struct page_idle_ctrl *pic, ...@@ -515,30 +529,40 @@ static int ept_page_range(struct page_idle_ctrl *pic,
WARN_ON(addr >= end); WARN_ON(addr >= end);
spin_lock(&pic->kvm->mmu_lock); spin_lock_irq(&pic->kvm->mmu_lock);
vcpu = kvm_get_vcpu(pic->kvm, 0); vcpu = kvm_get_vcpu(pic->kvm, 0);
if (!vcpu) { if (!vcpu) {
spin_unlock(&pic->kvm->mmu_lock); pic->gpa_to_hva = 0;
set_restart_gpa(TASK_SIZE, "NO-VCPU");
spin_unlock_irq(&pic->kvm->mmu_lock);
return -EINVAL; return -EINVAL;
} }
mmu = kvm_arch_mmu_pointer(vcpu); mmu = kvm_arch_mmu_pointer(vcpu);
if (!VALID_PAGE(mmu->root_hpa)) { if (!VALID_PAGE(mmu->root_hpa)) {
spin_unlock(&pic->kvm->mmu_lock); pic->gpa_to_hva = 0;
set_restart_gpa(TASK_SIZE, "NO-HPA");
spin_unlock_irq(&pic->kvm->mmu_lock);
return -EINVAL; return -EINVAL;
} }
ept_root = __va(mmu->root_hpa); ept_root = __va(mmu->root_hpa);
spin_unlock(&pic->kvm->mmu_lock);
local_irq_disable();
/* Walk start at p4d when vm has 4 level table pages */ /* Walk start at p4d when vm has 4 level table pages */
if (mmu->shadow_root_level != 4) if (mmu->shadow_root_level != 4)
err = ept_pgd_range(pic, (pgd_t *)ept_root, addr, end, walk); err = ept_pgd_range(pic, (pgd_t *)ept_root, addr, end, walk);
else else
err = ept_p4d_range(pic, (p4d_t *)ept_root, addr, end, walk); err = ept_p4d_range(pic, (p4d_t *)ept_root, addr, end, walk);
local_irq_enable();
/* mmu_lock is unlock in vm_walk_host_range which will unlock mmu_lock
* and RET_RESCAN_FLAG will be set in ret value
*/
if (!(err & RET_RESCAN_FLAG))
spin_unlock_irq(&pic->kvm->mmu_lock);
else
err &= ~RET_RESCAN_FLAG;
return err; return err;
} }
...@@ -807,6 +831,8 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic, ...@@ -807,6 +831,8 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic,
struct mm_walk *walk) struct mm_walk *walk)
{ {
unsigned long gpa_addr; unsigned long gpa_addr;
unsigned long gpa_next;
unsigned long gpa_end;
unsigned long addr_range; unsigned long addr_range;
unsigned long va_end; unsigned long va_end;
int ret; int ret;
...@@ -836,12 +862,20 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic, ...@@ -836,12 +862,20 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic,
} }
} else { } else {
pic->gpa_to_hva = start - gpa_addr; pic->gpa_to_hva = start - gpa_addr;
gpa_end = gpa_addr + addr_range;
for (; gpa_addr < gpa_end;) {
gpa_next = min(gpa_end, gpa_addr + walk_step * PAGE_SIZE);
#ifdef CONFIG_ARM64 #ifdef CONFIG_ARM64
arm_page_range(pic, gpa_addr, gpa_addr + addr_range); ret = arm_page_range(pic, gpa_addr, gpa_next);
#else #else
ept_page_range(pic, gpa_addr, gpa_addr + addr_range, walk); ret = ept_page_range(pic, gpa_addr, gpa_next, walk);
#endif #endif
va_end = pic->gpa_to_hva + gpa_addr + addr_range; gpa_addr = pic->restart_gpa;
if (ret)
break;
}
va_end = pic->gpa_to_hva + gpa_end;
} }
start = pic->restart_gpa + pic->gpa_to_hva; start = pic->restart_gpa + pic->gpa_to_hva;
...@@ -850,6 +884,9 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic, ...@@ -850,6 +884,9 @@ static int vm_idle_walk_hva_range(struct page_idle_ctrl *pic,
break; break;
} }
if (start > pic->next_hva)
set_next_hva(start, "NEXT-START");
if (pic->bytes_copied) if (pic->bytes_copied)
ret = 0; ret = 0;
return ret; return ret;
...@@ -1050,9 +1087,10 @@ static int mm_idle_pmd_entry(pmd_t *pmd, unsigned long addr, ...@@ -1050,9 +1087,10 @@ static int mm_idle_pmd_entry(pmd_t *pmd, unsigned long addr,
* Skip duplicate PMD_IDLE_PTES: when the PMD crosses VMA boundary, * Skip duplicate PMD_IDLE_PTES: when the PMD crosses VMA boundary,
* walk_page_range() can call on the same PMD twice. * walk_page_range() can call on the same PMD twice.
*/ */
if ((addr & PMD_MASK) == (pic->last_va & PMD_MASK)) { if ((addr & PMD_MASK) == (pic->last_va & PMD_MASK) && (pic->flags & SCAN_HUGE_PAGE)) {
debug_printk("ignore duplicate addr %pK %pK\n", debug_printk("ignore duplicate addr %pK %pK\n",
addr, pic->last_va); addr, pic->last_va);
set_restart_gpa(round_up(next, PMD_SIZE), "DUP_ADDR");
return 0; return 0;
} }
pic->last_va = addr; pic->last_va = addr;
...@@ -1144,12 +1182,17 @@ static int mm_idle_walk_range(struct page_idle_ctrl *pic, ...@@ -1144,12 +1182,17 @@ static int mm_idle_walk_range(struct page_idle_ctrl *pic,
up_read(&walk->mm->mmap_lock); up_read(&walk->mm->mmap_lock);
WARN_ONCE(pic->gpa_to_hva, "non-zero gpa_to_hva"); WARN_ONCE(pic->gpa_to_hva, "non-zero gpa_to_hva");
if (ret != PAGE_IDLE_KBUF_FULL && end > pic->restart_gpa)
pic->restart_gpa = end;
start = pic->restart_gpa; start = pic->restart_gpa;
ret = page_idle_copy_user(pic, start, end); ret = page_idle_copy_user(pic, start, end);
if (ret) if (ret)
break; break;
} }
if (start > pic->next_hva)
set_next_hva(start, "NEXT-START");
if (pic->bytes_copied) { if (pic->bytes_copied) {
if (ret != PAGE_IDLE_BUF_FULL && pic->next_hva < end) if (ret != PAGE_IDLE_BUF_FULL && pic->next_hva < end)
debug_printk("partial scan: next_hva=%pK end=%pK\n", debug_printk("partial scan: next_hva=%pK end=%pK\n",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部