bpf.h 13.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#ifndef _UAPI__LINUX_BPF_H__
#define _UAPI__LINUX_BPF_H__

#include <linux/types.h>
11
#include <linux/bpf_common.h>
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65

/* Extended instruction set based on top of classic BPF */

/* instruction classes */
#define BPF_ALU64	0x07	/* alu mode in double word width */

/* ld/ldx fields */
#define BPF_DW		0x18	/* double word */
#define BPF_XADD	0xc0	/* exclusive add */

/* alu/jmp fields */
#define BPF_MOV		0xb0	/* mov reg to reg */
#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */

/* change endianness of a register */
#define BPF_END		0xd0	/* flags for endianness conversion: */
#define BPF_TO_LE	0x00	/* convert to little-endian */
#define BPF_TO_BE	0x08	/* convert to big-endian */
#define BPF_FROM_LE	BPF_TO_LE
#define BPF_FROM_BE	BPF_TO_BE

#define BPF_JNE		0x50	/* jump != */
#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
#define BPF_CALL	0x80	/* function call */
#define BPF_EXIT	0x90	/* function return */

/* Register numbers */
enum {
	BPF_REG_0 = 0,
	BPF_REG_1,
	BPF_REG_2,
	BPF_REG_3,
	BPF_REG_4,
	BPF_REG_5,
	BPF_REG_6,
	BPF_REG_7,
	BPF_REG_8,
	BPF_REG_9,
	BPF_REG_10,
	__MAX_BPF_REG,
};

/* BPF has 10 general purpose 64-bit registers and stack frame. */
#define MAX_BPF_REG	__MAX_BPF_REG

struct bpf_insn {
	__u8	code;		/* opcode */
	__u8	dst_reg:4;	/* dest register */
	__u8	src_reg:4;	/* source register */
	__s16	off;		/* signed offset */
	__s32	imm;		/* signed immediate constant */
};

66
/* BPF syscall commands, see bpf(2) man-page for details. */
67 68
enum bpf_cmd {
	BPF_MAP_CREATE,
69 70 71 72
	BPF_MAP_LOOKUP_ELEM,
	BPF_MAP_UPDATE_ELEM,
	BPF_MAP_DELETE_ELEM,
	BPF_MAP_GET_NEXT_KEY,
73
	BPF_PROG_LOAD,
74 75
	BPF_OBJ_PIN,
	BPF_OBJ_GET,
76 77 78 79
};

enum bpf_map_type {
	BPF_MAP_TYPE_UNSPEC,
80
	BPF_MAP_TYPE_HASH,
81
	BPF_MAP_TYPE_ARRAY,
82
	BPF_MAP_TYPE_PROG_ARRAY,
83
	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
84
	BPF_MAP_TYPE_PERCPU_HASH,
85
	BPF_MAP_TYPE_PERCPU_ARRAY,
86
	BPF_MAP_TYPE_STACK_TRACE,
87
	BPF_MAP_TYPE_CGROUP_ARRAY,
88 89
};

90 91
enum bpf_prog_type {
	BPF_PROG_TYPE_UNSPEC,
92
	BPF_PROG_TYPE_SOCKET_FILTER,
93
	BPF_PROG_TYPE_KPROBE,
94
	BPF_PROG_TYPE_SCHED_CLS,
95
	BPF_PROG_TYPE_SCHED_ACT,
96
	BPF_PROG_TYPE_TRACEPOINT,
97
	BPF_PROG_TYPE_XDP,
98 99
};

100 101
#define BPF_PSEUDO_MAP_FD	1

102 103 104 105 106
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY		0 /* create new element or update existing */
#define BPF_NOEXIST	1 /* create new element if it didn't exist */
#define BPF_EXIST	2 /* update existing element */

107 108
#define BPF_F_NO_PREALLOC	(1U << 0)

109 110 111 112 113 114
union bpf_attr {
	struct { /* anonymous struct used by BPF_MAP_CREATE command */
		__u32	map_type;	/* one of enum bpf_map_type */
		__u32	key_size;	/* size of key in bytes */
		__u32	value_size;	/* size of value in bytes */
		__u32	max_entries;	/* max number of entries in a map */
115
		__u32	map_flags;	/* prealloc or not */
116
	};
117 118 119 120 121 122 123 124

	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
		__u32		map_fd;
		__aligned_u64	key;
		union {
			__aligned_u64 value;
			__aligned_u64 next_key;
		};
125
		__u64		flags;
126
	};
127 128 129 130 131 132

	struct { /* anonymous struct used by BPF_PROG_LOAD command */
		__u32		prog_type;	/* one of enum bpf_prog_type */
		__u32		insn_cnt;
		__aligned_u64	insns;
		__aligned_u64	license;
133 134 135
		__u32		log_level;	/* verbosity level of verifier */
		__u32		log_size;	/* size of user buffer */
		__aligned_u64	log_buf;	/* user supplied buffer */
136
		__u32		kern_version;	/* checked when prog_type=kprobe */
137
	};
138 139 140 141 142

	struct { /* anonymous struct used by BPF_OBJ_* commands */
		__aligned_u64	pathname;
		__u32		bpf_fd;
	};
143 144
} __attribute__((aligned(8)));

145 146 147 148 149
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
 */
enum bpf_func_id {
	BPF_FUNC_unspec,
150 151 152
	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
153
	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
154
	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
155
	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
156
	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
157
	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
158 159 160 161

	/**
	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
	 * @skb: pointer to skb
162
	 * @offset: offset within packet from skb->mac_header
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
	 * @from: pointer where to copy bytes from
	 * @len: number of bytes to store into packet
	 * @flags: bit 0 - if true, recompute skb->csum
	 *         other bits - reserved
	 * Return: 0 on success
	 */
	BPF_FUNC_skb_store_bytes,

	/**
	 * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
	 * @skb: pointer to skb
	 * @offset: offset within packet where IP checksum is located
	 * @from: old value of header field
	 * @to: new value of header field
	 * @flags: bits 0-3 - size of header field
	 *         other bits - reserved
	 * Return: 0 on success
	 */
	BPF_FUNC_l3_csum_replace,

	/**
	 * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
	 * @skb: pointer to skb
	 * @offset: offset within packet where TCP/UDP checksum is located
	 * @from: old value of header field
	 * @to: new value of header field
	 * @flags: bits 0-3 - size of header field
	 *         bit 4 - is pseudo header
	 *         other bits - reserved
	 * Return: 0 on success
	 */
	BPF_FUNC_l4_csum_replace,
195 196 197 198 199 200 201 202 203

	/**
	 * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
	 * @ctx: context pointer passed to next program
	 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
	 * @index: index inside array that selects specific program to run
	 * Return: 0 on success
	 */
	BPF_FUNC_tail_call,
204 205 206 207 208 209 210 211 212 213

	/**
	 * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
	 * @skb: pointer to skb
	 * @ifindex: ifindex of the net device
	 * @flags: bit 0 - if set, redirect to ingress instead of egress
	 *         other bits - reserved
	 * Return: 0 on success
	 */
	BPF_FUNC_clone_redirect,
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

	/**
	 * u64 bpf_get_current_pid_tgid(void)
	 * Return: current->tgid << 32 | current->pid
	 */
	BPF_FUNC_get_current_pid_tgid,

	/**
	 * u64 bpf_get_current_uid_gid(void)
	 * Return: current_gid << 32 | current_uid
	 */
	BPF_FUNC_get_current_uid_gid,

	/**
	 * bpf_get_current_comm(char *buf, int size_of_buf)
	 * stores current->comm into buf
	 * Return: 0 on success
	 */
	BPF_FUNC_get_current_comm,
233 234 235 236 237 238 239

	/**
	 * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
	 * @skb: pointer to skb
	 * Return: classid if != 0
	 */
	BPF_FUNC_get_cgroup_classid,
240 241
	BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
	BPF_FUNC_skb_vlan_pop,  /* bpf_skb_vlan_pop(skb) */
242 243 244 245 246 247 248 249 250 251 252 253

	/**
	 * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
	 * retrieve or populate tunnel metadata
	 * @skb: pointer to skb
	 * @key: pointer to 'struct bpf_tunnel_key'
	 * @size: size of 'struct bpf_tunnel_key'
	 * @flags: room for future extensions
	 * Retrun: 0 on success
	 */
	BPF_FUNC_skb_get_tunnel_key,
	BPF_FUNC_skb_set_tunnel_key,
254
	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
255 256 257 258 259 260 261 262
	/**
	 * bpf_redirect(ifindex, flags) - redirect to another netdev
	 * @ifindex: ifindex of the net device
	 * @flags: bit 0 - if set, redirect to ingress instead of egress
	 *         other bits - reserved
	 * Return: TC_ACT_REDIRECT
	 */
	BPF_FUNC_redirect,
263 264 265 266 267 268 269

	/**
	 * bpf_get_route_realm(skb) - retrieve a dst's tclassid
	 * @skb: pointer to skb
	 * Return: realm if != 0
	 */
	BPF_FUNC_get_route_realm,
270 271 272 273 274 275 276 277 278 279 280

	/**
	 * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
	 * @ctx: struct pt_regs*
	 * @map: pointer to perf_event_array map
	 * @index: index of event in the map
	 * @data: data on stack to be output as raw data
	 * @size: size of data
	 * Return: 0 on success
	 */
	BPF_FUNC_perf_event_output,
281
	BPF_FUNC_skb_load_bytes,
282 283 284 285 286 287 288 289 290 291 292 293 294 295

	/**
	 * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
	 * @ctx: struct pt_regs*
	 * @map: pointer to stack_trace map
	 * @flags: bits 0-7 - numer of stack frames to skip
	 *         bit 8 - collect user stack instead of kernel
	 *         bit 9 - compare stacks by hash only
	 *         bit 10 - if two different stacks hash into the same stackid
	 *                  discard old
	 *         other bits - reserved
	 * Return: >= 0 stackid on success or negative error
	 */
	BPF_FUNC_get_stackid,
296 297 298 299 300 301 302 303 304 305 306

	/**
	 * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
	 * @from: raw from buffer
	 * @from_size: length of from buffer
	 * @to: raw to buffer
	 * @to_size: length of to buffer
	 * @seed: optional seed
	 * Return: csum result
	 */
	BPF_FUNC_csum_diff,
307 308 309 310 311 312 313 314 315 316 317

	/**
	 * bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
	 * retrieve or populate tunnel options metadata
	 * @skb: pointer to skb
	 * @opt: pointer to raw tunnel option data
	 * @size: size of @opt
	 * Return: 0 on success for set, option size for get
	 */
	BPF_FUNC_skb_get_tunnel_opt,
	BPF_FUNC_skb_set_tunnel_opt,
318 319 320 321 322 323 324 325 326 327 328 329 330 331

	/**
	 * bpf_skb_change_proto(skb, proto, flags)
	 * Change protocol of the skb. Currently supported is
	 * v4 -> v6, v6 -> v4 transitions. The helper will also
	 * resize the skb. eBPF program is expected to fill the
	 * new headers via skb_store_bytes and lX_csum_replace.
	 * @skb: pointer to skb
	 * @proto: new skb->protocol type
	 * @flags: reserved
	 * Return: 0 on success or negative error
	 */
	BPF_FUNC_skb_change_proto,

332 333 334 335 336 337 338 339 340
	/**
	 * bpf_skb_change_type(skb, type)
	 * Change packet type of skb.
	 * @skb: pointer to skb
	 * @type: new skb->pkt_type type
	 * Return: 0 on success or negative error
	 */
	BPF_FUNC_skb_change_type,

341
	/**
342
	 * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
343 344 345 346 347 348 349 350
	 * @skb: pointer to skb
	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
	 * @index: index of the cgroup in the bpf_map
	 * Return:
	 *   == 0 skb failed the cgroup2 descendant test
	 *   == 1 skb succeeded the cgroup2 descendant test
	 *    < 0 error
	 */
351
	BPF_FUNC_skb_under_cgroup,
352 353 354 355 356 357 358 359 360

	/**
	 * bpf_get_hash_recalc(skb)
	 * Retrieve and possibly recalculate skb->hash.
	 * @skb: pointer to skb
	 * Return: hash
	 */
	BPF_FUNC_get_hash_recalc,

361 362 363 364 365 366 367
	/**
	 * u64 bpf_get_current_task(void)
	 * Returns current task_struct
	 * Return: current
	 */
	BPF_FUNC_get_current_task,

368 369 370 371 372 373 374 375 376 377
	/**
	 * bpf_probe_write_user(void *dst, void *src, int len)
	 * safely attempt to write to a location
	 * @dst: destination address in userspace
	 * @src: source address on stack
	 * @len: number of bytes to copy
	 * Return: 0 on success or negative error
	 */
	BPF_FUNC_probe_write_user,

378 379 380 381 382 383 384 385 386 387 388
	/**
	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
	 * @index: index of the cgroup in the bpf_map
	 * Return:
	 *   == 0 current failed the cgroup2 descendant test
	 *   == 1 current succeeded the cgroup2 descendant test
	 *    < 0 error
	 */
	BPF_FUNC_current_task_under_cgroup,

389 390 391 392 393 394 395 396 397 398 399
	/**
	 * bpf_skb_change_tail(skb, len, flags)
	 * The helper will resize the skb to the given new size,
	 * to be used f.e. with control messages.
	 * @skb: pointer to skb
	 * @len: new skb length
	 * @flags: reserved
	 * Return: 0 on success or negative error
	 */
	BPF_FUNC_skb_change_tail,

400 401 402
	__BPF_FUNC_MAX_ID,
};

403 404 405 406
/* All flags used by eBPF helper functions, placed here. */

/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
407
#define BPF_F_INVALIDATE_HASH		(1ULL << 1)
408 409 410 411 412 413 414 415

/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
 * First 4 bits are for passing the header field size.
 */
#define BPF_F_HDR_FIELD_MASK		0xfULL

/* BPF_FUNC_l4_csum_replace flags. */
#define BPF_F_PSEUDO_HDR		(1ULL << 4)
416
#define BPF_F_MARK_MANGLED_0		(1ULL << 5)
417 418 419 420

/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
#define BPF_F_INGRESS			(1ULL << 0)

421 422 423
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6		(1ULL << 0)

424 425 426 427 428 429
/* BPF_FUNC_get_stackid flags. */
#define BPF_F_SKIP_FIELD_MASK		0xffULL
#define BPF_F_USER_STACK		(1ULL << 8)
#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
#define BPF_F_REUSE_STACKID		(1ULL << 10)

430 431
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
432
#define BPF_F_DONT_FRAGMENT		(1ULL << 2)
433

434
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
435 436
#define BPF_F_INDEX_MASK		0xffffffffULL
#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
437 438
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
439

440 441 442 443 444 445 446 447
/* user accessible mirror of in-kernel sk_buff.
 * new fields can only be added to the end of this structure
 */
struct __sk_buff {
	__u32 len;
	__u32 pkt_type;
	__u32 mark;
	__u32 queue_mapping;
448 449 450
	__u32 protocol;
	__u32 vlan_present;
	__u32 vlan_tci;
451
	__u32 vlan_proto;
452
	__u32 priority;
453 454
	__u32 ingress_ifindex;
	__u32 ifindex;
455 456
	__u32 tc_index;
	__u32 cb[5];
457
	__u32 hash;
458
	__u32 tc_classid;
A
Alexei Starovoitov 已提交
459 460
	__u32 data;
	__u32 data_end;
461 462
};

463 464
struct bpf_tunnel_key {
	__u32 tunnel_id;
465 466 467 468 469 470
	union {
		__u32 remote_ipv4;
		__u32 remote_ipv6[4];
	};
	__u8 tunnel_tos;
	__u8 tunnel_ttl;
471
	__u16 tunnel_ext;
472
	__u32 tunnel_label;
473 474
};

475 476 477 478 479 480 481 482 483
/* User return codes for XDP prog type.
 * A valid XDP program must return one of these defined values. All other
 * return codes are reserved for future use. Unknown return codes will result
 * in packet drop.
 */
enum xdp_action {
	XDP_ABORTED = 0,
	XDP_DROP,
	XDP_PASS,
484
	XDP_TX,
485 486 487 488 489 490 491 492 493 494
};

/* user accessible metadata for XDP packet hook
 * new fields must be added to the end of this structure
 */
struct xdp_md {
	__u32 data;
	__u32 data_end;
};

495
#endif /* _UAPI__LINUX_BPF_H__ */