bpf_trace.c 19.5 KB
Newer Older
1
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
2
 * Copyright (c) 2016 Facebook
3 4 5 6 7 8 9 10 11
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
12
#include <linux/bpf_perf_event.h>
13 14
#include <linux/filter.h>
#include <linux/uaccess.h>
15
#include <linux/ctype.h>
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#include "trace.h"

/**
 * trace_call_bpf - invoke BPF program
 * @prog: BPF program
 * @ctx: opaque context pointer
 *
 * kprobe handlers execute BPF programs via this helper.
 * Can be used from static tracepoints in the future.
 *
 * Return: BPF programs always return an integer which is interpreted by
 * kprobe handler as:
 * 0 - return from kprobe (event is filtered out)
 * 1 - store kprobe event into ring buffer
 * Other values are reserved and currently alias to 1
 */
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
{
	unsigned int ret;

	if (in_nmi()) /* not supported yet */
		return 1;

	preempt_disable();

	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
		/*
		 * since some bpf program is already running on this cpu,
		 * don't call into another bpf program (same or different)
		 * and don't send kprobe event into ring-buffer,
		 * so return zero here
		 */
		ret = 0;
		goto out;
	}

	rcu_read_lock();
	ret = BPF_PROG_RUN(prog, ctx);
	rcu_read_unlock();

 out:
	__this_cpu_dec(bpf_prog_active);
	preempt_enable();

	return ret;
}
EXPORT_SYMBOL_GPL(trace_call_bpf);

64
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
65
{
66
	int ret;
67

68 69 70 71 72
	ret = probe_kernel_read(dst, unsafe_ptr, size);
	if (unlikely(ret < 0))
		memset(dst, 0, size);

	return ret;
73 74 75 76 77 78
}

static const struct bpf_func_proto bpf_probe_read_proto = {
	.func		= bpf_probe_read,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
79 80
	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
	.arg2_type	= ARG_CONST_SIZE,
81 82 83
	.arg3_type	= ARG_ANYTHING,
};

84 85
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
	   u32, size)
86 87 88 89 90 91 92 93 94 95 96 97 98
{
	/*
	 * Ensure we're in user context which is safe for the helper to
	 * run. This helper has no business in a kthread.
	 *
	 * access_ok() should prevent writing to non-user memory, but in
	 * some situations (nommu, temporary switch, etc) access_ok() does
	 * not provide enough validation, hence the check on KERNEL_DS.
	 */

	if (unlikely(in_interrupt() ||
		     current->flags & (PF_KTHREAD | PF_EXITING)))
		return -EPERM;
A
Al Viro 已提交
99
	if (unlikely(uaccess_kernel()))
100 101 102 103 104 105 106 107 108 109 110 111
		return -EPERM;
	if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
		return -EPERM;

	return probe_kernel_write(unsafe_ptr, src, size);
}

static const struct bpf_func_proto bpf_probe_write_user_proto = {
	.func		= bpf_probe_write_user,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_ANYTHING,
112 113
	.arg2_type	= ARG_PTR_TO_MEM,
	.arg3_type	= ARG_CONST_SIZE,
114 115 116 117 118 119 120 121 122 123
};

static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
{
	pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
			    current->comm, task_pid_nr(current));

	return &bpf_probe_write_user_proto;
}

124
/*
125 126
 * Only limited trace_printk() conversion specifiers allowed:
 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
127
 */
128 129
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
	   u64, arg2, u64, arg3)
130
{
131
	bool str_seen = false;
132 133
	int mod[3] = {};
	int fmt_cnt = 0;
134 135
	u64 unsafe_addr;
	char buf[64];
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
	int i;

	/*
	 * bpf_check()->check_func_arg()->check_stack_boundary()
	 * guarantees that fmt points to bpf program stack,
	 * fmt_size bytes of it were initialized and fmt_size > 0
	 */
	if (fmt[--fmt_size] != 0)
		return -EINVAL;

	/* check format string for allowed specifiers */
	for (i = 0; i < fmt_size; i++) {
		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
			return -EINVAL;

		if (fmt[i] != '%')
			continue;

		if (fmt_cnt >= 3)
			return -EINVAL;

		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
		i++;
		if (fmt[i] == 'l') {
			mod[fmt_cnt]++;
			i++;
162
		} else if (fmt[i] == 'p' || fmt[i] == 's') {
163 164 165 166 167
			mod[fmt_cnt]++;
			i++;
			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
				return -EINVAL;
			fmt_cnt++;
168 169 170 171 172 173 174 175
			if (fmt[i - 1] == 's') {
				if (str_seen)
					/* allow only one '%s' per fmt string */
					return -EINVAL;
				str_seen = true;

				switch (fmt_cnt) {
				case 1:
176 177
					unsafe_addr = arg1;
					arg1 = (long) buf;
178 179
					break;
				case 2:
180 181
					unsafe_addr = arg2;
					arg2 = (long) buf;
182 183
					break;
				case 3:
184 185
					unsafe_addr = arg3;
					arg3 = (long) buf;
186 187 188 189 190 191 192
					break;
				}
				buf[0] = 0;
				strncpy_from_unsafe(buf,
						    (void *) (long) unsafe_addr,
						    sizeof(buf));
			}
193 194 195 196 197 198 199 200
			continue;
		}

		if (fmt[i] == 'l') {
			mod[fmt_cnt]++;
			i++;
		}

201 202
		if (fmt[i] != 'i' && fmt[i] != 'd' &&
		    fmt[i] != 'u' && fmt[i] != 'x')
203 204 205 206
			return -EINVAL;
		fmt_cnt++;
	}

207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
/* Horrid workaround for getting va_list handling working with different
 * argument type combinations generically for 32 and 64 bit archs.
 */
#define __BPF_TP_EMIT()	__BPF_ARG3_TP()
#define __BPF_TP(...)							\
	__trace_printk(1 /* Fake ip will not be printed. */,		\
		       fmt, ##__VA_ARGS__)

#define __BPF_ARG1_TP(...)						\
	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
	  ? __BPF_TP(arg1, ##__VA_ARGS__)				\
	  : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32))	\
	      ? __BPF_TP((long)arg1, ##__VA_ARGS__)			\
	      : __BPF_TP((u32)arg1, ##__VA_ARGS__)))

#define __BPF_ARG2_TP(...)						\
	((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64))	\
	  ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__)				\
	  : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32))	\
	      ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__)		\
	      : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))

#define __BPF_ARG3_TP(...)						\
	((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64))	\
	  ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__)				\
	  : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32))	\
	      ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__)		\
	      : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))

	return __BPF_TP_EMIT();
237 238 239 240 241 242
}

static const struct bpf_func_proto bpf_trace_printk_proto = {
	.func		= bpf_trace_printk,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
243 244
	.arg1_type	= ARG_PTR_TO_MEM,
	.arg2_type	= ARG_CONST_SIZE,
245 246
};

247 248 249 250 251 252 253 254 255 256 257
const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
{
	/*
	 * this program might be calling bpf_trace_printk,
	 * so allocate per-cpu printk buffers
	 */
	trace_printk_init_buffers();

	return &bpf_trace_printk_proto;
}

258 259 260
static __always_inline int
get_map_perf_counter(struct bpf_map *map, u64 flags,
		     u64 *value, u64 *enabled, u64 *running)
261 262
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
263 264
	unsigned int cpu = smp_processor_id();
	u64 index = flags & BPF_F_INDEX_MASK;
265
	struct bpf_event_entry *ee;
266

267 268 269 270
	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
		return -EINVAL;
	if (index == BPF_F_CURRENT_CPU)
		index = cpu;
271 272 273
	if (unlikely(index >= array->map.max_entries))
		return -E2BIG;

274
	ee = READ_ONCE(array->ptrs[index]);
275
	if (!ee)
276 277
		return -ENOENT;

278 279 280 281 282 283 284 285 286
	return perf_event_read_local(ee->event, value, enabled, running);
}

BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
{
	u64 value = 0;
	int err;

	err = get_map_perf_counter(map, flags, &value, NULL, NULL);
287
	/*
288 289
	 * this api is ugly since we miss [-22..-2] range of valid
	 * counter values, but that's uapi
290
	 */
291 292 293
	if (err)
		return err;
	return value;
294 295
}

296
static const struct bpf_func_proto bpf_perf_event_read_proto = {
297
	.func		= bpf_perf_event_read,
298
	.gpl_only	= true,
299 300 301 302 303
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_CONST_MAP_PTR,
	.arg2_type	= ARG_ANYTHING,
};

304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
	   struct bpf_perf_event_value *, buf, u32, size)
{
	int err = -EINVAL;

	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
		goto clear;
	err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
				   &buf->running);
	if (unlikely(err))
		goto clear;
	return 0;
clear:
	memset(buf, 0, size);
	return err;
}

static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
	.func		= bpf_perf_event_read_value,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_CONST_MAP_PTR,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
	.arg4_type	= ARG_CONST_SIZE,
};

331 332
static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);

333 334 335
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
			u64 flags, struct perf_raw_record *raw)
336 337
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
338
	struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
339
	unsigned int cpu = smp_processor_id();
340
	u64 index = flags & BPF_F_INDEX_MASK;
341
	struct bpf_event_entry *ee;
342 343
	struct perf_event *event;

344
	if (index == BPF_F_CURRENT_CPU)
345
		index = cpu;
346 347 348
	if (unlikely(index >= array->map.max_entries))
		return -E2BIG;

349
	ee = READ_ONCE(array->ptrs[index]);
350
	if (!ee)
351 352
		return -ENOENT;

353
	event = ee->event;
354 355 356 357
	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
		return -EINVAL;

358
	if (unlikely(event->oncpu != cpu))
359 360
		return -EOPNOTSUPP;

361 362 363
	perf_sample_data_init(sd, 0, 0);
	sd->raw = raw;
	perf_event_output(event, sd, regs);
364 365 366
	return 0;
}

367 368
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
	   u64, flags, void *, data, u64, size)
369 370 371 372 373 374 375 376 377 378 379 380 381 382
{
	struct perf_raw_record raw = {
		.frag = {
			.size = size,
			.data = data,
		},
	};

	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
		return -EINVAL;

	return __bpf_perf_event_output(regs, map, flags, &raw);
}

383 384
static const struct bpf_func_proto bpf_perf_event_output_proto = {
	.func		= bpf_perf_event_output,
385
	.gpl_only	= true,
386 387 388 389
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_CONST_MAP_PTR,
	.arg3_type	= ARG_ANYTHING,
390 391
	.arg4_type	= ARG_PTR_TO_MEM,
	.arg5_type	= ARG_CONST_SIZE,
392 393
};

394 395
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);

396 397
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
398 399
{
	struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
400 401 402 403 404 405 406
	struct perf_raw_frag frag = {
		.copy		= ctx_copy,
		.size		= ctx_size,
		.data		= ctx,
	};
	struct perf_raw_record raw = {
		.frag = {
407 408 409
			{
				.next	= ctx_size ? &frag : NULL,
			},
410 411 412 413
			.size	= meta_size,
			.data	= meta,
		},
	};
414 415 416

	perf_fetch_caller_regs(regs);

417
	return __bpf_perf_event_output(regs, map, flags, &raw);
418 419
}

420
BPF_CALL_0(bpf_get_current_task)
421 422 423 424 425 426 427 428 429 430
{
	return (long) current;
}

static const struct bpf_func_proto bpf_get_current_task_proto = {
	.func		= bpf_get_current_task,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
};

431
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct cgroup *cgrp;

	if (unlikely(in_interrupt()))
		return -EINVAL;
	if (unlikely(idx >= array->map.max_entries))
		return -E2BIG;

	cgrp = READ_ONCE(array->ptrs[idx]);
	if (unlikely(!cgrp))
		return -EAGAIN;

	return task_under_cgroup_hierarchy(current, cgrp);
}

static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
	.func           = bpf_current_task_under_cgroup,
	.gpl_only       = false,
	.ret_type       = RET_INTEGER,
	.arg1_type      = ARG_CONST_MAP_PTR,
	.arg2_type      = ARG_ANYTHING,
};

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
	   const void *, unsafe_ptr)
{
	int ret;

	/*
	 * The strncpy_from_unsafe() call will likely not fill the entire
	 * buffer, but that's okay in this circumstance as we're probing
	 * arbitrary memory anyway similar to bpf_probe_read() and might
	 * as well probe the stack. Thus, memory is explicitly cleared
	 * only in error case, so that improper users ignoring return
	 * code altogether don't copy garbage; otherwise length of string
	 * is returned that can be used for bpf_perf_event_output() et al.
	 */
	ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
	if (unlikely(ret < 0))
		memset(dst, 0, size);

	return ret;
}

static const struct bpf_func_proto bpf_probe_read_str_proto = {
	.func		= bpf_probe_read_str,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
	.arg2_type	= ARG_CONST_SIZE,
	.arg3_type	= ARG_ANYTHING,
};

486
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
487 488 489 490 491 492 493 494 495 496
{
	switch (func_id) {
	case BPF_FUNC_map_lookup_elem:
		return &bpf_map_lookup_elem_proto;
	case BPF_FUNC_map_update_elem:
		return &bpf_map_update_elem_proto;
	case BPF_FUNC_map_delete_elem:
		return &bpf_map_delete_elem_proto;
	case BPF_FUNC_probe_read:
		return &bpf_probe_read_proto;
497 498
	case BPF_FUNC_ktime_get_ns:
		return &bpf_ktime_get_ns_proto;
499 500
	case BPF_FUNC_tail_call:
		return &bpf_tail_call_proto;
501 502
	case BPF_FUNC_get_current_pid_tgid:
		return &bpf_get_current_pid_tgid_proto;
503 504
	case BPF_FUNC_get_current_task:
		return &bpf_get_current_task_proto;
505 506 507 508
	case BPF_FUNC_get_current_uid_gid:
		return &bpf_get_current_uid_gid_proto;
	case BPF_FUNC_get_current_comm:
		return &bpf_get_current_comm_proto;
509
	case BPF_FUNC_trace_printk:
510
		return bpf_get_trace_printk_proto();
511 512
	case BPF_FUNC_get_smp_processor_id:
		return &bpf_get_smp_processor_id_proto;
513 514
	case BPF_FUNC_get_numa_node_id:
		return &bpf_get_numa_node_id_proto;
515 516
	case BPF_FUNC_perf_event_read:
		return &bpf_perf_event_read_proto;
517 518
	case BPF_FUNC_probe_write_user:
		return bpf_get_probe_write_proto();
519 520
	case BPF_FUNC_current_task_under_cgroup:
		return &bpf_current_task_under_cgroup_proto;
521 522
	case BPF_FUNC_get_prandom_u32:
		return &bpf_get_prandom_u32_proto;
523 524
	case BPF_FUNC_probe_read_str:
		return &bpf_probe_read_str_proto;
525 526 527 528 529 530 531 532
	default:
		return NULL;
	}
}

static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
{
	switch (func_id) {
533 534
	case BPF_FUNC_perf_event_output:
		return &bpf_perf_event_output_proto;
535 536
	case BPF_FUNC_get_stackid:
		return &bpf_get_stackid_proto;
537 538
	case BPF_FUNC_perf_event_read_value:
		return &bpf_perf_event_read_value_proto;
539
	default:
540
		return tracing_func_proto(func_id);
541 542 543 544
	}
}

/* bpf+kprobe programs can access fields of 'struct pt_regs' */
545
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
546
					struct bpf_insn_access_aux *info)
547 548 549 550 551 552 553
{
	if (off < 0 || off >= sizeof(struct pt_regs))
		return false;
	if (type != BPF_READ)
		return false;
	if (off % size != 0)
		return false;
554 555 556 557 558 559 560
	/*
	 * Assertion for 32 bit to make sure last 8 byte access
	 * (BPF_DW) to the last 4 byte member is disallowed.
	 */
	if (off + size > sizeof(struct pt_regs))
		return false;

561 562 563
	return true;
}

564
const struct bpf_verifier_ops kprobe_verifier_ops = {
565 566 567 568
	.get_func_proto  = kprobe_prog_func_proto,
	.is_valid_access = kprobe_prog_is_valid_access,
};

569 570 571
const struct bpf_prog_ops kprobe_prog_ops = {
};

572 573
BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
	   u64, flags, void *, data, u64, size)
574
{
575 576
	struct pt_regs *regs = *(struct pt_regs **)tp_buff;

577 578 579
	/*
	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
580
	 * from there and call the same bpf_perf_event_output() helper inline.
581
	 */
582
	return ____bpf_perf_event_output(regs, map, flags, data, size);
583 584 585 586 587 588 589 590 591
}

static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
	.func		= bpf_perf_event_output_tp,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_CONST_MAP_PTR,
	.arg3_type	= ARG_ANYTHING,
592 593
	.arg4_type	= ARG_PTR_TO_MEM,
	.arg5_type	= ARG_CONST_SIZE,
594 595
};

596 597
BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
	   u64, flags)
598
{
599
	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
600

601 602 603 604 605 606 607
	/*
	 * Same comment as in bpf_perf_event_output_tp(), only that this time
	 * the other helper's function body cannot be inlined due to being
	 * external, thus we need to call raw helper function.
	 */
	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
			       flags, 0, 0);
608 609 610 611 612 613 614 615 616 617 618
}

static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
	.func		= bpf_get_stackid_tp,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_CONST_MAP_PTR,
	.arg3_type	= ARG_ANYTHING,
};

619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
	   struct bpf_perf_event_value *, buf, u32, size)
{
	int err = -EINVAL;

	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
		goto clear;
	err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
				    &buf->running);
	if (unlikely(err))
		goto clear;
	return 0;
clear:
	memset(buf, 0, size);
	return err;
}

static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
         .func           = bpf_perf_prog_read_value_tp,
         .gpl_only       = true,
         .ret_type       = RET_INTEGER,
         .arg1_type      = ARG_PTR_TO_CTX,
         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
         .arg3_type      = ARG_CONST_SIZE,
};

645 646 647 648
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
{
	switch (func_id) {
	case BPF_FUNC_perf_event_output:
649
		return &bpf_perf_event_output_proto_tp;
650
	case BPF_FUNC_get_stackid:
651
		return &bpf_get_stackid_proto_tp;
652 653
	case BPF_FUNC_perf_prog_read_value:
		return &bpf_perf_prog_read_value_proto_tp;
654 655 656 657 658
	default:
		return tracing_func_proto(func_id);
	}
}

659
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
660
				    struct bpf_insn_access_aux *info)
661 662 663 664 665 666 667
{
	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
		return false;
	if (type != BPF_READ)
		return false;
	if (off % size != 0)
		return false;
668 669

	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
670 671 672
	return true;
}

673
const struct bpf_verifier_ops tracepoint_verifier_ops = {
674 675 676 677
	.get_func_proto  = tp_prog_func_proto,
	.is_valid_access = tp_prog_is_valid_access,
};

678 679 680
const struct bpf_prog_ops tracepoint_prog_ops = {
};

681
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
682
				    struct bpf_insn_access_aux *info)
683
{
684 685
	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
					 sample_period);
686

687 688 689 690 691 692
	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
		return false;
	if (type != BPF_READ)
		return false;
	if (off % size != 0)
		return false;
693

694 695 696 697
	switch (off) {
	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
		bpf_ctx_record_field_size(info, size_sp);
		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
698
			return false;
699 700
		break;
	default:
701 702 703
		if (size != sizeof(long))
			return false;
	}
704

705 706 707
	return true;
}

708 709
static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
				      const struct bpf_insn *si,
710
				      struct bpf_insn *insn_buf,
711
				      struct bpf_prog *prog, u32 *target_size)
712 713 714
{
	struct bpf_insn *insn = insn_buf;

715
	switch (si->off) {
716
	case offsetof(struct bpf_perf_event_data, sample_period):
717
		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
718
						       data), si->dst_reg, si->src_reg,
719
				      offsetof(struct bpf_perf_event_data_kern, data));
720
		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
721 722
				      bpf_target_off(struct perf_sample_data, period, 8,
						     target_size));
723 724
		break;
	default:
725
		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
726
						       regs), si->dst_reg, si->src_reg,
727
				      offsetof(struct bpf_perf_event_data_kern, regs));
728 729
		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
				      si->off);
730 731 732 733 734 735
		break;
	}

	return insn - insn_buf;
}

736
const struct bpf_verifier_ops perf_event_verifier_ops = {
737 738 739 740
	.get_func_proto		= tp_prog_func_proto,
	.is_valid_access	= pe_prog_is_valid_access,
	.convert_ctx_access	= pe_prog_convert_ctx_access,
};
741 742 743

const struct bpf_prog_ops perf_event_prog_ops = {
};