bpf.h 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#ifndef _LINUX_BPF_H
#define _LINUX_BPF_H 1

#include <uapi/linux/bpf.h>
11

12
#include <linux/workqueue.h>
13
#include <linux/file.h>
14
#include <linux/percpu.h>
Z
Zi Shen Lim 已提交
15
#include <linux/err.h>
16
#include <linux/rbtree_latch.h>
17
#include <linux/numa.h>
18

19
struct perf_event;
20
struct bpf_prog;
21 22 23 24 25 26
struct bpf_map;

/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
	/* funcs callable from userspace (via syscall) */
	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
27 28
	void (*map_release)(struct bpf_map *map, struct file *map_file);
	void (*map_free)(struct bpf_map *map);
29 30 31 32
	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);

	/* funcs callable from userspace and from eBPF programs */
	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
33
	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
34
	int (*map_delete_elem)(struct bpf_map *map, void *key);
35 36

	/* funcs called by prog_array and perf_event_array map */
37 38 39
	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
				int fd);
	void (*map_fd_put_ptr)(void *ptr);
40
	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
41
	u32 (*map_fd_sys_lookup_elem)(void *ptr);
42 43 44 45 46 47 48 49
};

struct bpf_map {
	atomic_t refcnt;
	enum bpf_map_type map_type;
	u32 key_size;
	u32 value_size;
	u32 max_entries;
50
	u32 map_flags;
51
	u32 pages;
M
Martin KaFai Lau 已提交
52
	u32 id;
53
	int numa_node;
54
	struct user_struct *user;
55
	const struct bpf_map_ops *ops;
56
	struct work_struct work;
57
	atomic_t usercnt;
58
	struct bpf_map *inner_map_meta;
59
	u8 name[BPF_OBJ_NAME_LEN];
60 61
};

62 63
/* function argument constraints */
enum bpf_arg_type {
64
	ARG_DONTCARE = 0,	/* unused argument in helper function */
65 66 67 68 69 70 71 72 73 74 75

	/* the following constraints used to prototype
	 * bpf_map_lookup/update/delete_elem() functions
	 */
	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */

	/* the following constraints used to prototype bpf_memcmp() and other
	 * functions that access data on eBPF program stack
	 */
76 77 78 79
	ARG_PTR_TO_MEM,		/* pointer to valid memory (stack, packet, map value) */
	ARG_PTR_TO_UNINIT_MEM,	/* pointer to memory does not need to be initialized,
				 * helper function must fill all bytes or clear
				 * them in error case.
80 81
				 */

82 83
	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
84

85
	ARG_PTR_TO_CTX,		/* pointer to context */
86
	ARG_ANYTHING,		/* any (initialized) argument is ok */
87 88 89 90 91 92 93 94 95
};

/* type of values returned from helper functions */
enum bpf_return_type {
	RET_INTEGER,			/* function returns integer */
	RET_VOID,			/* function doesn't return anything */
	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
};

96 97 98 99 100 101 102
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
 * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
 * instructions after verifying
 */
struct bpf_func_proto {
	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
	bool gpl_only;
103
	bool pkt_access;
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
	enum bpf_return_type ret_type;
	enum bpf_arg_type arg1_type;
	enum bpf_arg_type arg2_type;
	enum bpf_arg_type arg3_type;
	enum bpf_arg_type arg4_type;
	enum bpf_arg_type arg5_type;
};

/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
 * the first argument to eBPF programs.
 * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
 */
struct bpf_context;

enum bpf_access_type {
	BPF_READ = 1,
	BPF_WRITE = 2
121 122
};

123
/* types of values stored in eBPF registers */
124 125 126 127 128 129 130 131 132
/* Pointer types represent:
 * pointer
 * pointer + imm
 * pointer + (u16) var
 * pointer + (u16) var + imm
 * if (range > 0) then [ptr, ptr + range - off) is safe to access
 * if (id > 0) means that some 'var' was added
 * if (off > 0) means that 'imm' was added
 */
133 134
enum bpf_reg_type {
	NOT_INIT = 0,		 /* nothing was written into register */
135
	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
136 137 138 139
	PTR_TO_CTX,		 /* reg points to bpf_context */
	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
140
	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
141
	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
142
	PTR_TO_PACKET,		 /* reg points to skb->data */
143 144 145
	PTR_TO_PACKET_END,	 /* skb->data + headlen */
};

146 147 148 149 150 151 152 153
/* The information passed from prog-specific *_is_valid_access
 * back to the verifier.
 */
struct bpf_insn_access_aux {
	enum bpf_reg_type reg_type;
	int ctx_field_size;
};

154 155 156 157 158 159
static inline void
bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
{
	aux->ctx_field_size = size;
}

160 161 162
struct bpf_verifier_ops {
	/* return eBPF function prototype for verification */
	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
163 164 165 166

	/* return true if 'size' wide access at offset 'off' within bpf_context
	 * with 'type' (read or write) is allowed
	 */
167
	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
168
				struct bpf_insn_access_aux *info);
169 170
	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
			    const struct bpf_prog *prog);
171 172 173
	u32 (*convert_ctx_access)(enum bpf_access_type type,
				  const struct bpf_insn *src,
				  struct bpf_insn *dst,
174
				  struct bpf_prog *prog, u32 *target_size);
175 176
	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
			union bpf_attr __user *uattr);
177 178 179 180
};

struct bpf_prog_aux {
	atomic_t refcnt;
181
	u32 used_map_cnt;
182
	u32 max_ctx_offset;
183
	u32 stack_depth;
M
Martin KaFai Lau 已提交
184
	u32 id;
185 186
	struct latch_tree_node ksym_tnode;
	struct list_head ksym_lnode;
187
	const struct bpf_verifier_ops *ops;
188 189
	struct bpf_map **used_maps;
	struct bpf_prog *prog;
190
	struct user_struct *user;
191 192
	u64 load_time; /* ns since boottime */
	u8 name[BPF_OBJ_NAME_LEN];
193 194 195 196
	union {
		struct work_struct work;
		struct rcu_head	rcu;
	};
197 198
};

199 200 201 202 203 204 205 206 207 208 209 210
struct bpf_array {
	struct bpf_map map;
	u32 elem_size;
	/* 'ownership' of prog_array is claimed by the first program that
	 * is going to use this map or by the first program which FD is stored
	 * in the map to make sure that all callers and callees have the same
	 * prog_type and JITed flag
	 */
	enum bpf_prog_type owner_prog_type;
	bool owner_jited;
	union {
		char value[0] __aligned(8);
211
		void *ptrs[0] __aligned(8);
212
		void __percpu *pptrs[0] __aligned(8);
213 214
	};
};
215

216 217
#define MAX_TAIL_CALL_CNT 32

218 219 220 221 222 223 224
struct bpf_event_entry {
	struct perf_event *event;
	struct file *perf_file;
	struct file *map_file;
	struct rcu_head rcu;
};

225
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
226
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
227

228
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
229
int bpf_prog_calc_tag(struct bpf_prog *fp);
230

231
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
232 233

typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
234
					unsigned long off, unsigned long len);
235 236 237

u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
238

239 240 241 242 243
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
			  union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
			  union bpf_attr __user *uattr);

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
/* an array of programs to be executed under rcu_lock.
 *
 * Typical usage:
 * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
 *
 * the structure returned by bpf_prog_array_alloc() should be populated
 * with program pointers and the last pointer must be NULL.
 * The user has to keep refcnt on the program and make sure the program
 * is removed from the array before bpf_prog_put().
 * The 'struct bpf_prog_array *' should only be replaced with xchg()
 * since other cpus are walking the array of pointers in parallel.
 */
struct bpf_prog_array {
	struct rcu_head rcu;
	struct bpf_prog *progs[0];
};

struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);

#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
	({						\
		struct bpf_prog **_prog;		\
		u32 _ret = 1;				\
		rcu_read_lock();			\
		_prog = rcu_dereference(array)->progs;	\
		for (; *_prog; _prog++)			\
			_ret &= func(*_prog, ctx);	\
		rcu_read_unlock();			\
		_ret;					\
	 })

276
#ifdef CONFIG_BPF_SYSCALL
277 278
DECLARE_PER_CPU(int, bpf_prog_active);

279 280
#define BPF_PROG_TYPE(_id, _ops) \
	extern const struct bpf_verifier_ops _ops;
281 282
#define BPF_MAP_TYPE(_id, _ops) \
	extern const struct bpf_map_ops _ops;
283 284
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
285
#undef BPF_MAP_TYPE
286 287

struct bpf_prog *bpf_prog_get(u32 ufd);
288
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
289
struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
290
void bpf_prog_sub(struct bpf_prog *prog, int i);
291
struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
292
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
293
void bpf_prog_put(struct bpf_prog *prog);
294 295
int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
296

297
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
298
struct bpf_map *__bpf_map_get(struct fd f);
299
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
300
void bpf_map_put_with_uref(struct bpf_map *map);
301
void bpf_map_put(struct bpf_map *map);
302
int bpf_map_precharge_memlock(u32 pages);
303
void *bpf_map_area_alloc(size_t size, int numa_node);
304
void bpf_map_area_free(void *base);
305

306 307
extern int sysctl_unprivileged_bpf_disabled;

308 309 310 311 312 313
int bpf_map_new_fd(struct bpf_map *map);
int bpf_prog_new_fd(struct bpf_prog *prog);

int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname);

314 315 316 317 318 319
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
			   u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
			    u64 flags);
320

321
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
322

323 324
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
				 void *key, void *value, u64 map_flags);
325
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
326
void bpf_fd_array_map_clear(struct bpf_map *map);
M
Martin KaFai Lau 已提交
327 328
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
				void *key, void *value, u64 map_flags);
329
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
330

331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
 * forced to use 'long' read/writes to try to atomically copy long counters.
 * Best-effort only.  No barriers here, since it _will_ race with concurrent
 * updates from BPF programs. Called from bpf syscall and mostly used with
 * size 8 or 16 bytes, so ask compiler to inline it.
 */
static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
{
	const long *lsrc = src;
	long *ldst = dst;

	size /= sizeof(long);
	while (size--)
		*ldst++ = *lsrc++;
}

347
/* verify correctness of eBPF program */
348
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
349 350 351 352 353 354

/* Map specifics */
struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);

355 356 357 358 359 360 361
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
{
	return (attr->map_flags & BPF_F_NUMA_NODE) ?
		attr->numa_node : NUMA_NO_NODE;
}

362
#else
363 364 365 366 367
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
	return ERR_PTR(-EOPNOTSUPP);
}

368 369 370 371 372
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
						 enum bpf_prog_type type)
{
	return ERR_PTR(-EOPNOTSUPP);
}
373 374
static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog,
							  int i)
375 376 377
{
	return ERR_PTR(-EOPNOTSUPP);
}
378

379 380 381 382
static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
{
}

383 384 385
static inline void bpf_prog_put(struct bpf_prog *prog)
{
}
386 387

static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
388 389 390
{
	return ERR_PTR(-EOPNOTSUPP);
}
391

392 393 394 395 396 397
static inline struct bpf_prog *__must_check
bpf_prog_inc_not_zero(struct bpf_prog *prog)
{
	return ERR_PTR(-EOPNOTSUPP);
}

398 399 400 401 402 403 404 405
static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
	return 0;
}

static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
{
}
406 407 408 409 410 411 412 413 414 415 416 417 418 419

static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
						       u32 key)
{
	return NULL;
}

static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
{
}

static inline void __dev_map_flush(struct bpf_map *map)
{
}
420
#endif /* CONFIG_BPF_SYSCALL */
421

422 423
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
424
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
425 426 427 428 429
#else
static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
{
	return NULL;
}
430

431 432 433
static inline int sock_map_prog(struct bpf_map *map,
				struct bpf_prog *prog,
				u32 type)
434 435 436
{
	return -EOPNOTSUPP;
}
437 438
#endif

439
/* verifier prototypes for helper functions called from eBPF programs */
440 441 442
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
443

444
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
445
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
446
extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
447
extern const struct bpf_func_proto bpf_tail_call_proto;
448
extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
449 450 451
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
452 453
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
454
extern const struct bpf_func_proto bpf_get_stackid_proto;
455
extern const struct bpf_func_proto bpf_sock_map_update_proto;
456

457 458 459 460
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

461
#endif /* _LINUX_BPF_H */