kvm_trace.c 6.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * kvm trace
 *
 * It is designed to allow debugging traces of kvm to be generated
 * on UP / SMP machines.  Each trace entry can be timestamped so that
 * it's possible to reconstruct a chronological record of trace events.
 * The implementation refers to blktrace kernel support.
 *
 * Copyright (c) 2008 Intel Corporation
 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
 *
 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
 *
 * Date:    Feb 2008
 */

#include <linux/module.h>
#include <linux/relay.h>
#include <linux/debugfs.h>
20
#include <linux/ktime.h>
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38

#include <linux/kvm_host.h>

#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)

struct kvm_trace {
	int trace_state;
	struct rchan *rchan;
	struct dentry *lost_file;
	atomic_t lost_records;
};
static struct kvm_trace *kvm_trace;

struct kvm_trace_probe {
	const char *name;
	const char *format;
39
	u32 timestamp_in;
40 41 42
	marker_probe_func *probe_func;
};

43
static inline int calc_rec_size(int timestamp, int extra)
44 45 46 47
{
	int rec_size = KVM_TRC_HEAD_SIZE;

	rec_size += extra;
48
	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
49 50 51 52 53 54 55 56 57
}

static void kvm_add_trace(void *probe_private, void *call_data,
			  const char *format, va_list *args)
{
	struct kvm_trace_probe *p = probe_private;
	struct kvm_trace *kt = kvm_trace;
	struct kvm_trace_rec rec;
	struct kvm_vcpu *vcpu;
58 59
	int    i, size;
	u32    extra;
60 61 62 63

	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
		return;

64
	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
65 66 67 68 69 70 71 72
	vcpu		= va_arg(*args, struct kvm_vcpu *);
	rec.pid		= current->tgid;
	rec.vcpu_id	= vcpu->vcpu_id;

	extra   	= va_arg(*args, u32);
	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);

73
	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
74
			| TRACE_REC_NUM_DATA_ARGS(extra);
75

76 77
	if (p->timestamp_in) {
		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
78

79
		for (i = 0; i < extra; i++)
80
			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
81
	} else {
82
		for (i = 0; i < extra; i++)
83
			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
84 85
	}

86
	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
	relay_write(kt->rchan, &rec, size);
}

static struct kvm_trace_probe kvm_trace_probes[] = {
	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
};

static int lost_records_get(void *data, u64 *val)
{
	struct kvm_trace *kt = data;

	*val = atomic_read(&kt->lost_records);
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");

/*
 *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
 *  many times we encountered a full subbuffer, to tell user space app the
 *  lost records there were.
 */
static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
				     void *prev_subbuf, size_t prev_padding)
{
	struct kvm_trace *kt;

T
Tan, Li 已提交
115 116 117 118 119 120 121 122 123 124
	if (!relay_buf_full(buf)) {
		if (!prev_subbuf) {
			/*
			 * executed only once when the channel is opened
			 * save metadata as first record
			 */
			subbuf_start_reserve(buf, sizeof(u32));
			*(u32 *)subbuf = 0x12345678;
		}

125
		return 1;
T
Tan, Li 已提交
126
	}
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169

	kt = buf->chan->private_data;
	atomic_inc(&kt->lost_records);

	return 0;
}

static struct dentry *kvm_create_buf_file_callack(const char *filename,
						 struct dentry *parent,
						 int mode,
						 struct rchan_buf *buf,
						 int *is_global)
{
	return debugfs_create_file(filename, mode, parent, buf,
				   &relay_file_operations);
}

static int kvm_remove_buf_file_callback(struct dentry *dentry)
{
	debugfs_remove(dentry);
	return 0;
}

static struct rchan_callbacks kvm_relay_callbacks = {
	.subbuf_start 		= kvm_subbuf_start_callback,
	.create_buf_file 	= kvm_create_buf_file_callack,
	.remove_buf_file 	= kvm_remove_buf_file_callback,
};

static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
{
	struct kvm_trace *kt;
	int i, r = -ENOMEM;

	if (!kuts->buf_size || !kuts->buf_nr)
		return -EINVAL;

	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
	if (!kt)
		goto err;

	r = -EIO;
	atomic_set(&kt->lost_records, 0);
170
	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
171 172 173 174
					    kt, &kvm_trace_lost_ops);
	if (!kt->lost_file)
		goto err;

175
	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
				kuts->buf_nr, &kvm_relay_callbacks, kt);
	if (!kt->rchan)
		goto err;

	kvm_trace = kt;

	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
		struct kvm_trace_probe *p = &kvm_trace_probes[i];

		r = marker_probe_register(p->name, p->format, p->probe_func, p);
		if (r)
			printk(KERN_INFO "Unable to register probe %s\n",
			       p->name);
	}

	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;

	return 0;
err:
	if (kt) {
		if (kt->lost_file)
			debugfs_remove(kt->lost_file);
		if (kt->rchan)
			relay_close(kt->rchan);
		kfree(kt);
	}
	return r;
}

static int kvm_trace_enable(char __user *arg)
{
	struct kvm_user_trace_setup kuts;
	int ret;

	ret = copy_from_user(&kuts, arg, sizeof(kuts));
	if (ret)
		return -EFAULT;

	ret = do_kvm_trace_enable(&kuts);
	if (ret)
		return ret;

	return 0;
}

static int kvm_trace_pause(void)
{
	struct kvm_trace *kt = kvm_trace;
	int r = -EINVAL;

	if (kt == NULL)
		return r;

	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
		kt->trace_state = KVM_TRACE_STATE_PAUSE;
		relay_flush(kt->rchan);
		r = 0;
	}

	return r;
}

void kvm_trace_cleanup(void)
{
	struct kvm_trace *kt = kvm_trace;
	int i;

	if (kt == NULL)
		return;

	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {

		kt->trace_state = KVM_TRACE_STATE_CLEARUP;

		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
			struct kvm_trace_probe *p = &kvm_trace_probes[i];
			marker_probe_unregister(p->name, p->probe_func, p);
		}
255
		marker_synchronize_unregister();
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285

		relay_close(kt->rchan);
		debugfs_remove(kt->lost_file);
		kfree(kt);
	}
}

int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
{
	void __user *argp = (void __user *)arg;
	long r = -EINVAL;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	switch (ioctl) {
	case KVM_TRACE_ENABLE:
		r = kvm_trace_enable(argp);
		break;
	case KVM_TRACE_PAUSE:
		r = kvm_trace_pause();
		break;
	case KVM_TRACE_DISABLE:
		r = 0;
		kvm_trace_cleanup();
		break;
	}

	return r;
}