mmu_audit.c 6.5 KB
Newer Older
1 2 3 4 5 6
/*
 * mmu_audit.c:
 *
 * Audit code for KVM MMU
 *
 * Copyright (C) 2006 Qumranet, Inc.
N
Nicolas Kaiser 已提交
7
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8 9 10 11 12 13 14 15 16 17 18 19
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *   Marcelo Tosatti <mtosatti@redhat.com>
 *   Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

20 21
#include <linux/ratelimit.h>

22 23 24 25 26 27 28 29 30
char const *audit_point_name[] = {
	"pre page fault",
	"post page fault",
	"pre pte write",
	"post pte write",
	"pre sync",
	"post sync"
};

31
#define audit_printk(kvm, fmt, args...)		\
32
	printk(KERN_ERR "audit: (%s) error: "	\
33
		fmt, audit_point_name[kvm->arch.audit_point], ##args)
34

X
Xiao Guangrong 已提交
35
typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
36

X
Xiao Guangrong 已提交
37 38
static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
			    inspect_spte_fn fn, int level)
39 40 41 42
{
	int i;

	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
X
Xiao Guangrong 已提交
43 44 45 46 47 48 49 50 51 52
		u64 *ent = sp->spt;

		fn(vcpu, ent + i, level);

		if (is_shadow_present_pte(ent[i]) &&
		      !is_last_spte(ent[i], level)) {
			struct kvm_mmu_page *child;

			child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
			__mmu_spte_walk(vcpu, child, fn, level - 1);
53 54 55 56 57 58 59 60 61 62 63
		}
	}
}

static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
{
	int i;
	struct kvm_mmu_page *sp;

	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
		return;
X
Xiao Guangrong 已提交
64

65
	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
66
		hpa_t root = vcpu->arch.mmu.root_hpa;
X
Xiao Guangrong 已提交
67

68
		sp = page_header(root);
X
Xiao Guangrong 已提交
69
		__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
70 71
		return;
	}
X
Xiao Guangrong 已提交
72

73 74 75 76 77 78
	for (i = 0; i < 4; ++i) {
		hpa_t root = vcpu->arch.mmu.pae_root[i];

		if (root && VALID_PAGE(root)) {
			root &= PT64_BASE_ADDR_MASK;
			sp = page_header(root);
X
Xiao Guangrong 已提交
79
			__mmu_spte_walk(vcpu, sp, fn, 2);
80 81
		}
	}
X
Xiao Guangrong 已提交
82

83 84 85
	return;
}

X
Xiao Guangrong 已提交
86 87 88 89 90 91 92 93 94 95
typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);

static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
{
	struct kvm_mmu_page *sp;

	list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
		fn(kvm, sp);
}

X
Xiao Guangrong 已提交
96
static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
97
{
X
Xiao Guangrong 已提交
98 99 100 101
	struct kvm_mmu_page *sp;
	gfn_t gfn;
	pfn_t pfn;
	hpa_t hpa;
102

X
Xiao Guangrong 已提交
103 104 105 106
	sp = page_header(__pa(sptep));

	if (sp->unsync) {
		if (level != PT_PAGE_TABLE_LEVEL) {
107 108
			audit_printk(vcpu->kvm, "unsync sp: %p "
				     "level = %d\n", sp, level);
109 110
			return;
		}
X
Xiao Guangrong 已提交
111
	}
112

X
Xiao Guangrong 已提交
113 114
	if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
		return;
115

X
Xiao Guangrong 已提交
116 117
	gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
	pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
118

X
Xiao Guangrong 已提交
119 120 121
	if (is_error_pfn(pfn)) {
		kvm_release_pfn_clean(pfn);
		return;
122 123
	}

X
Xiao Guangrong 已提交
124 125
	hpa =  pfn << PAGE_SHIFT;
	if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
126 127 128
		audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
			     "ent %llxn", vcpu->arch.mmu.root_level, pfn,
			     hpa, *sptep);
129 130
}

X
Xiao Guangrong 已提交
131
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
132
{
133
	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
134 135 136 137 138 139 140 141
	unsigned long *rmapp;
	struct kvm_mmu_page *rev_sp;
	gfn_t gfn;

	rev_sp = page_header(__pa(sptep));
	gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);

	if (!gfn_to_memslot(kvm, gfn)) {
142
		if (!__ratelimit(&ratelimit_state))
143
			return;
144 145
		audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
		audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
146
		       (long int)(sptep - rev_sp->spt), rev_sp->gfn);
147 148 149 150 151 152
		dump_stack();
		return;
	}

	rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
	if (!*rmapp) {
153
		if (!__ratelimit(&ratelimit_state))
154
			return;
155 156
		audit_printk(kvm, "no rmap for writable spte %llx\n",
			     *sptep);
157 158 159 160
		dump_stack();
	}
}

X
Xiao Guangrong 已提交
161
static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
162
{
X
Xiao Guangrong 已提交
163 164
	if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
		inspect_spte_has_rmap(vcpu->kvm, sptep);
165 166
}

167 168 169 170
static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
{
	struct kvm_mmu_page *sp = page_header(__pa(sptep));

171 172 173
	if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
		audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
			     "root.\n", sp);
174 175
}

X
Xiao Guangrong 已提交
176
static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
177 178 179
{
	int i;

X
Xiao Guangrong 已提交
180 181
	if (sp->role.level != PT_PAGE_TABLE_LEVEL)
		return;
182

X
Xiao Guangrong 已提交
183 184
	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
		if (!is_rmap_spte(sp->spt[i]))
185 186
			continue;

X
Xiao Guangrong 已提交
187
		inspect_spte_has_rmap(kvm, sp->spt + i);
188 189 190
	}
}

191
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
192 193 194 195 196
{
	struct kvm_memory_slot *slot;
	unsigned long *rmapp;
	u64 *spte;

X
Xiao Guangrong 已提交
197 198
	if (sp->role.direct || sp->unsync || sp->role.invalid)
		return;
199

X
Xiao Guangrong 已提交
200 201
	slot = gfn_to_memslot(kvm, sp->gfn);
	rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
202

203
	spte = rmap_next(rmapp, NULL);
X
Xiao Guangrong 已提交
204 205
	while (spte) {
		if (is_writable_pte(*spte))
206 207 208
			audit_printk(kvm, "shadow page has writable "
				     "mappings: gfn %llx role %x\n",
				     sp->gfn, sp->role.word);
209
		spte = rmap_next(rmapp, spte);
210 211 212
	}
}

X
Xiao Guangrong 已提交
213 214 215 216 217 218 219 220 221 222 223
static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
	check_mappings_rmap(kvm, sp);
	audit_write_protection(kvm, sp);
}

static void audit_all_active_sps(struct kvm *kvm)
{
	walk_all_active_sps(kvm, audit_sp);
}

X
Xiao Guangrong 已提交
224 225 226 227
static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
{
	audit_sptes_have_rmaps(vcpu, sptep, level);
	audit_mappings(vcpu, sptep, level);
228
	audit_spte_after_sync(vcpu, sptep, level);
X
Xiao Guangrong 已提交
229 230 231 232 233 234 235
}

static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
{
	mmu_spte_walk(vcpu, audit_spte);
}

236
static bool mmu_audit;
237
static struct static_key mmu_audit_key;
238

239
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
240
{
241 242
	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);

243 244
	if (!__ratelimit(&ratelimit_state))
		return;
245

246 247 248 249 250 251 252
	vcpu->kvm->arch.audit_point = point;
	audit_all_active_sps(vcpu->kvm);
	audit_vcpu_spte(vcpu);
}

static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
253
	if (static_key_false((&mmu_audit_key)))
254
		__kvm_mmu_audit(vcpu, point);
255 256 257 258 259 260 261
}

static void mmu_audit_enable(void)
{
	if (mmu_audit)
		return;

262
	static_key_slow_inc(&mmu_audit_key);
263 264 265 266 267 268 269 270
	mmu_audit = true;
}

static void mmu_audit_disable(void)
{
	if (!mmu_audit)
		return;

271
	static_key_slow_dec(&mmu_audit_key);
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
	mmu_audit = false;
}

static int mmu_audit_set(const char *val, const struct kernel_param *kp)
{
	int ret;
	unsigned long enable;

	ret = strict_strtoul(val, 10, &enable);
	if (ret < 0)
		return -EINVAL;

	switch (enable) {
	case 0:
		mmu_audit_disable();
		break;
	case 1:
		mmu_audit_enable();
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static struct kernel_param_ops audit_param_ops = {
	.set = mmu_audit_set,
	.get = param_get_bool,
};

module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);