提交 1b80f86e 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-04-21

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Initial work on BPF Type Format (BTF) is added, which is a meta
   data format which describes the data types of BPF programs / maps.
   BTF has its roots from CTF (Compact C-Type format) with a number
   of changes to it. First use case is to provide a generic pretty
   print capability for BPF maps inspection, later work will also
   add BTF to bpftool. pahole support to convert dwarf to BTF will
   be upstreamed as well (https://github.com/iamkafai/pahole/tree/btf),
   from Martin.

2) Add a new xdp_bpf_adjust_tail() BPF helper for XDP that allows
   for changing the data_end pointer. Only shrinking is currently
   supported which helps for crafting ICMP control messages. Minor
   changes in drivers have been added where needed so they recalc
   the packet's length also when data_end was adjusted, from Nikita.

3) Improve bpftool to make it easier to feed hex bytes via cmdline
   for map operations, from Quentin.

4) Add support for various missing BPF prog types and attach types
   that have been added to kernel recently but neither to bpftool
   nor libbpf yet. Doc and bash completion updates have been added
   as well for bpftool, from Andrey.

5) Proper fix for avoiding to leak info stored in frame data on page
   reuse for the two bpf_xdp_adjust_{head,meta} helpers by disallowing
   to move the pointers into struct xdp_frame area, from Jesper.

6) Follow-up compile fix from BTF in order to include stdbool.h in
   libbpf, from Björn.

7) Few fixes in BPF sample code, that is, a typo on the netdevice
   in a comment and fixup proper dump of XDP action code in the
   tracepoint exception, from Wang and Jesper.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
if (tx_avail != bp->tx_ring_size)
*event &= ~BNXT_RX_EVENT;
*len = xdp.data_end - xdp.data;
if (orig_data != xdp.data) {
offset = xdp.data - xdp.data_hard_start;
*data_ptr = xdp.data_hard_start + offset;
*len = xdp.data_end - xdp.data;
}
switch (act) {
case XDP_PASS:
......
......@@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
action = bpf_prog_run_xdp(prog, &xdp);
rcu_read_unlock();
len = xdp.data_end - xdp.data;
/* Check if XDP program has changed headers */
if (orig_data != xdp.data) {
len = xdp.data_end - xdp.data;
offset = orig_data - xdp.data;
dma_addr -= offset;
}
......
......@@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (xdp.data != orig_data) {
length = xdp.data_end - xdp.data;
if (xdp.data != orig_data) {
frags[0].page_offset = xdp.data -
xdp.data_hard_start;
va = xdp.data;
......
......@@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
act = bpf_prog_run_xdp(xdp_prog, &xdp);
pkt_len -= xdp.data - orig_data;
pkt_len = xdp.data_end - xdp.data;
pkt_off += xdp.data - orig_data;
switch (act) {
......
......@@ -1690,6 +1690,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
return NULL;
case XDP_PASS:
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
break;
default:
bpf_warn_invalid_xdp_action(act);
......@@ -1710,7 +1711,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
}
skb_reserve(skb, pad - delta);
skb_put(skb, len + delta);
skb_put(skb, len);
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
......
......@@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
case XDP_PASS:
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
break;
case XDP_TX:
xdpf = convert_to_xdp_frame(&xdp);
......@@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
goto err;
}
skb_reserve(skb, headroom - delta);
skb_put(skb, len + delta);
skb_put(skb, len);
if (!delta) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
......@@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
offset = xdp.data -
page_address(xdp_page) - vi->hdr_len;
/* recalculate len if xdp.data or xdp.data_end were
* adjusted
*/
len = xdp.data_end - xdp.data;
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
......
......@@ -22,6 +22,8 @@ struct perf_event;
struct bpf_prog;
struct bpf_map;
struct sock;
struct seq_file;
struct btf;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
......@@ -43,10 +45,14 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
u32 key_type_id, u32 value_type_id);
};
struct bpf_map {
/* 1st cacheline with read-mostly members of which some
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
......@@ -62,10 +68,13 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
u32 btf_key_id;
u32 btf_value_id;
struct btf *btf;
bool unpriv_array;
/* 7 bytes hole */
/* 55 bytes hole */
/* 2nd cacheline with misc members to avoid false sharing
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
......@@ -100,6 +109,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
return container_of(map, struct bpf_offloaded_map, map);
}
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
return map->ops->map_seq_show_elem && map->ops->map_check_btf;
}
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
......
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#ifndef _LINUX_BTF_H
#define _LINUX_BTF_H 1
#include <linux/types.h>
struct btf;
struct btf_type;
union bpf_attr;
extern const struct file_operations btf_fops;
void btf_put(struct btf *btf);
int btf_new_fd(const union bpf_attr *attr);
struct btf *btf_get_by_fd(int fd);
int btf_get_info_by_fd(const struct btf *btf,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
/* Figure out the size of a type_id. If type_id is a modifier
* (e.g. const), it will be resolved to find out the type with size.
*
* For example:
* In describing "const void *", type_id is "const" and "const"
* refers to "void *". The return type will be "void *".
*
* If type_id is a simple "int", then return type will be "int".
*
* @btf: struct btf object
* @type_id: Find out the size of type_id. The type_id of the return
* type is set to *type_id.
* @ret_size: It can be NULL. If not NULL, the size of the return
* type is set to *ret_size.
* Return: The btf_type (resolved to another type with size info if needed).
* NULL is returned if type_id itself does not have size info
* (e.g. void) or it cannot be resolved to another type that
* has size info.
* *type_id and *ret_size will not be changed in the
* NULL return case.
*/
const struct btf_type *btf_type_id_size(const struct btf *btf,
u32 *type_id,
u32 *ret_size);
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m);
#endif
......@@ -95,6 +95,7 @@ enum bpf_cmd {
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
};
enum bpf_map_type {
......@@ -279,6 +280,9 @@ union bpf_attr {
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_id; /* BTF type_id of the key */
__u32 btf_value_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
......@@ -363,6 +367,14 @@ union bpf_attr {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
struct { /* anonymous struct for BPF_BTF_LOAD */
__aligned_u64 btf;
__aligned_u64 btf_log_buf;
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
};
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -755,6 +767,13 @@ union bpf_attr {
* @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code
*
* int bpf_xdp_adjust_tail(xdp_md, delta)
* Adjust the xdp_md.data_end by delta. Only shrinking of packet's
* size is supported.
* @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -821,7 +840,8 @@ union bpf_attr {
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
FN(bind),
FN(bind), \
FN(xdp_adjust_tail),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (c) 2018 Facebook */
#ifndef _UAPI__LINUX_BTF_H__
#define _UAPI__LINUX_BTF_H__
#include <linux/types.h>
#define BTF_MAGIC 0xeB9F
#define BTF_MAGIC_SWAP 0x9FeB
#define BTF_VERSION 1
#define BTF_FLAGS_COMPR 0x01
struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 parent_label;
__u32 parent_name;
/* All offsets are in bytes relative to the end of this header */
__u32 label_off; /* offset of label section */
__u32 object_off; /* offset of data object section*/
__u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
#define BTF_MAX_TYPE 0x7fffffff
/* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x7fffffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
/* The type id is referring to a parent BTF */
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
/* String is in the ELF string section */
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
struct btf_type {
__u32 name;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bits 31: root
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
* "type" is a type_id referring to another type.
*/
union {
__u32 size;
__u32 type;
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
#define BTF_KIND_INT 1 /* Integer */
#define BTF_KIND_PTR 2 /* Pointer */
#define BTF_KIND_ARRAY 3 /* Array */
#define BTF_KIND_STRUCT 4 /* Struct */
#define BTF_KIND_UNION 5 /* Union */
#define BTF_KIND_ENUM 6 /* Enumeration */
#define BTF_KIND_FWD 7 /* Forward */
#define BTF_KIND_TYPEDEF 8 /* Typedef */
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_MAX 11
#define NR_BTF_KINDS 12
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
*/
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED 0x1
#define BTF_INT_CHAR 0x2
#define BTF_INT_BOOL 0x4
#define BTF_INT_VARARGS 0x8
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the
* info in "struct btf_type").
*/
struct btf_enum {
__u32 name;
__s32 val;
};
/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
struct btf_array {
__u32 type;
__u32 index_type;
__u32 nelems;
};
/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
* by multiple "struct btf_member". The exact number
* of btf_member is stored in the vlen (of the info in
* "struct btf_type").
*/
struct btf_member {
__u32 name;
__u32 type;
__u32 offset; /* offset in bits */
};
#endif /* _UAPI__LINUX_BTF_H__ */
......@@ -4,6 +4,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
......
......@@ -11,11 +11,13 @@
* General Public License for more details.
*/
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <uapi/linux/btf.h>
#include "map_in_map.h"
......@@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map)
bpf_map_area_free(array);
}
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
void *value;
rcu_read_lock();
value = array_map_lookup_elem(map, key);
if (!value) {
rcu_read_unlock();
return;
}
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_id, value, m);
seq_puts(m, "\n");
rcu_read_unlock();
}
static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
u32 btf_key_id, u32 btf_value_id)
{
const struct btf_type *key_type, *value_type;
u32 key_size, value_size;
u32 int_data;
key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
return -EINVAL;
int_data = *(u32 *)(key_type + 1);
/* bpf array can only take a u32 key. This check makes
* sure that the btf matches the attr used during map_create.
*/
if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
BTF_INT_OFFSET(int_data))
return -EINVAL;
value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
if (!value_type || value_size > map->value_size)
return -EINVAL;
return 0;
}
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
......@@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = {
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
const struct bpf_map_ops percpu_array_map_ops = {
......
此差异已折叠。
......@@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0;
}
struct map_iter {
void *key;
bool done;
};
static struct map_iter *map_iter(struct seq_file *m)
{
return m->private;
}
static struct bpf_map *seq_file_to_map(struct seq_file *m)
{
return file_inode(m->file)->i_private;
}
static void map_iter_free(struct map_iter *iter)
{
if (iter) {
kfree(iter->key);
kfree(iter);
}
}
static struct map_iter *map_iter_alloc(struct bpf_map *map)
{
struct map_iter *iter;
iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN);
if (!iter)
goto error;
iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN);
if (!iter->key)
goto error;
return iter;
error:
map_iter_free(iter);
return NULL;
}
static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
struct bpf_map *map = seq_file_to_map(m);
void *key = map_iter(m)->key;
if (map_iter(m)->done)
return NULL;
if (unlikely(v == SEQ_START_TOKEN))
goto done;
if (map->ops->map_get_next_key(map, key, key)) {
map_iter(m)->done = true;
return NULL;
}
done:
++(*pos);
return key;
}
static void *map_seq_start(struct seq_file *m, loff_t *pos)
{
if (map_iter(m)->done)
return NULL;
return *pos ? map_iter(m)->key : SEQ_START_TOKEN;
}
static void map_seq_stop(struct seq_file *m, void *v)
{
}
static int map_seq_show(struct seq_file *m, void *v)
{
struct bpf_map *map = seq_file_to_map(m);
void *key = map_iter(m)->key;
if (unlikely(v == SEQ_START_TOKEN)) {
seq_puts(m, "# WARNING!! The output is for debug purpose only\n");
seq_puts(m, "# WARNING!! The output format will change\n");
} else {
map->ops->map_seq_show_elem(map, key, m);
}
return 0;
}
static const struct seq_operations bpffs_map_seq_ops = {
.start = map_seq_start,
.next = map_seq_next,
.show = map_seq_show,
.stop = map_seq_stop,
};
static int bpffs_map_open(struct inode *inode, struct file *file)
{
struct bpf_map *map = inode->i_private;
struct map_iter *iter;
struct seq_file *m;
int err;
iter = map_iter_alloc(map);
if (!iter)
return -ENOMEM;
err = seq_open(file, &bpffs_map_seq_ops);
if (err) {
map_iter_free(iter);
return err;
}
m = file->private_data;
m->private = iter;
return 0;
}
static int bpffs_map_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
map_iter_free(map_iter(m));
return seq_release(inode, file);
}
/* bpffs_map_fops should only implement the basic
* read operation for a BPF map. The purpose is to
* provide a simple user intuitive way to do
* "cat bpffs/pathto/a-pinned-map".
*
* Other operations (e.g. write, lookup...) should be realized by
* the userspace tools (e.g. bpftool) through the
* BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update
* interface.
*/
static const struct file_operations bpffs_map_fops = {
.open = bpffs_map_open,
.read = seq_read,
.release = bpffs_map_release,
};
static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
const struct inode_operations *iops)
const struct inode_operations *iops,
const struct file_operations *fops)
{
struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
......@@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
return PTR_ERR(inode);
inode->i_op = iops;
inode->i_fop = fops;
inode->i_private = raw;
bpf_dentry_finalize(dentry, inode, dir);
......@@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
{
return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL);
}
static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
{
return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
struct bpf_map *map = arg;
return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
map->btf ? &bpffs_map_fops : NULL);
}
static struct dentry *
......
......@@ -11,6 +11,7 @@
*/
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
......@@ -26,6 +27,7 @@
#include <linux/cred.h>
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
......@@ -250,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
bpf_map_uncharge_memlock(map);
security_bpf_map_free(map);
btf_put(map->btf);
/* implementation dependent freeing */
map->ops->map_free(map);
}
......@@ -415,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
......@@ -449,6 +452,33 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) &&
(attr->btf_key_id || attr->btf_value_id)) {
struct btf *btf;
if (!attr->btf_key_id || !attr->btf_value_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto free_map_nouncharge;
}
err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
attr->btf_value_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
}
map->btf = btf;
map->btf_key_id = attr->btf_key_id;
map->btf_value_id = attr->btf_value_id;
}
err = security_bpf_map_alloc(map);
if (err)
goto free_map_nouncharge;
......@@ -481,6 +511,7 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
free_map_nouncharge:
btf_put(map->btf);
map->ops->map_free(map);
return err;
}
......@@ -2016,6 +2047,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
else if (f.file->f_op == &bpf_map_fops)
err = bpf_map_get_info_by_fd(f.file->private_data, attr,
uattr);
else if (f.file->f_op == &btf_fops)
err = btf_get_info_by_fd(f.file->private_data, attr, uattr);
else
err = -EINVAL;
......@@ -2023,6 +2056,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
static int bpf_btf_load(const union bpf_attr *attr)
{
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
return btf_new_fd(attr);
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
......@@ -2103,6 +2149,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_RAW_TRACEPOINT_OPEN:
err = bpf_raw_tracepoint_open(&attr);
break;
case BPF_BTF_LOAD:
err = bpf_btf_load(&attr);
break;
default:
err = -EINVAL;
break;
......
......@@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.rxq = &rxqueue->xdp_rxq;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN)
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
kfree(data);
......
......@@ -3997,9 +3997,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
struct netdev_rx_queue *rxqueue;
void *orig_data, *orig_data_end;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
void *orig_data;
int hlen, off;
u32 mac_len;
......@@ -4038,6 +4038,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data_end = xdp.data_end;
orig_data = xdp.data;
rxqueue = netif_get_rxqueue(skb);
......@@ -4052,6 +4053,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
__skb_push(skb, -off);
skb->mac_header += off;
/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
* pckt.
*/
off = orig_data_end - xdp.data_end;
if (off != 0)
skb_set_tail_pointer(skb, xdp.data_end - xdp.data);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
......
......@@ -2694,20 +2694,13 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
unsigned long metalen = xdp_get_metalen(xdp);
void *data_start = xdp->data_hard_start + metalen;
void *data_start = xdp_frame_end + metalen;
void *data = xdp->data + offset;
if (unlikely(data < data_start ||
data > xdp->data_end - ETH_HLEN))
return -EINVAL;
/* Avoid info leak, when reusing area prev used by xdp_frame */
if (data < xdp_frame_end) {
unsigned long clearlen = xdp_frame_end - data;
memset(data, 0, clearlen);
}
if (metalen)
memmove(xdp->data_meta + offset,
xdp->data_meta, metalen);
......@@ -2725,14 +2718,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_end = xdp->data_end + offset;
/* only shrinking is allowed for now. */
if (unlikely(offset >= 0))
return -EINVAL;
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
xdp->data_end = data_end;
return 0;
}
static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
.func = bpf_xdp_adjust_tail,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
{
void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
void *meta = xdp->data_meta + offset;
unsigned long metalen = xdp->data - meta;
if (xdp_data_meta_unsupported(xdp))
return -ENOTSUPP;
if (unlikely(meta < xdp->data_hard_start ||
if (unlikely(meta < xdp_frame_end ||
meta > xdp->data))
return -EINVAL;
if (unlikely((metalen & (sizeof(__u32) - 1)) ||
......@@ -3074,7 +3092,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data)
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail)
return true;
return false;
......@@ -3888,6 +3907,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map:
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
default:
return bpf_base_func_proto(func_id);
}
......
......@@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
......@@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
......@@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
......@@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
HOSTLOADLIBES_cpustat += -lelf
HOSTLOADLIBES_xdp_adjust_tail += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
......
......@@ -9,10 +9,10 @@
* if (value)
* (*(u64*)value) += 1;
*
* - attaches this program to eth0 raw socket
* - attaches this program to loopback interface "lo" raw socket
*
* - every second user space reads map[tcp], map[udp], map[icmp] to see
* how many packets of given protocol were seen on eth0
* how many packets of given protocol were seen on "lo"
*/
#include <stdio.h>
#include <unistd.h>
......
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program shows how to use bpf_xdp_adjust_tail() by
* generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed
* to be more preice in case of v4)" where receiving packets bigger then
* 600 bytes.
*/
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include "bpf_helpers.h"
#define DEFAULT_TTL 64
#define MAX_PCKT_SIZE 600
#define ICMP_TOOBIG_SIZE 98
#define ICMP_TOOBIG_PAYLOAD_SIZE 92
struct bpf_map_def SEC("maps") icmpcnt = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 1,
};
static __always_inline void count_icmp(void)
{
u64 key = 0;
u64 *icmp_count;
icmp_count = bpf_map_lookup_elem(&icmpcnt, &key);
if (icmp_count)
*icmp_count += 1;
}
static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth)
{
struct ethhdr *eth;
eth = data;
memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN);
memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN);
eth->h_proto = orig_eth->h_proto;
}
static __always_inline __u16 csum_fold_helper(__u32 csum)
{
return ~((csum & 0xffff) + (csum >> 16));
}
static __always_inline void ipv4_csum(void *data_start, int data_size,
__u32 *csum)
{
*csum = bpf_csum_diff(0, 0, data_start, data_size, *csum);
*csum = csum_fold_helper(*csum);
}
static __always_inline int send_icmp4_too_big(struct xdp_md *xdp)
{
int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr);
if (bpf_xdp_adjust_head(xdp, 0 - headroom))
return XDP_DROP;
void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end;
if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end)
return XDP_DROP;
struct iphdr *iph, *orig_iph;
struct icmphdr *icmp_hdr;
struct ethhdr *orig_eth;
__u32 csum = 0;
__u64 off = 0;
orig_eth = data + headroom;
swap_mac(data, orig_eth);
off += sizeof(struct ethhdr);
iph = data + off;
off += sizeof(struct iphdr);
icmp_hdr = data + off;
off += sizeof(struct icmphdr);
orig_iph = data + off;
icmp_hdr->type = ICMP_DEST_UNREACH;
icmp_hdr->code = ICMP_FRAG_NEEDED;
icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr));
icmp_hdr->checksum = 0;
ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum);
icmp_hdr->checksum = csum;
iph->ttl = DEFAULT_TTL;
iph->daddr = orig_iph->saddr;
iph->saddr = orig_iph->daddr;
iph->version = 4;
iph->ihl = 5;
iph->protocol = IPPROTO_ICMP;
iph->tos = 0;
iph->tot_len = htons(
ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr));
iph->check = 0;
csum = 0;
ipv4_csum(iph, sizeof(struct iphdr), &csum);
iph->check = csum;
count_icmp();
return XDP_TX;
}
static __always_inline int handle_ipv4(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int pckt_size = data_end - data;
int offset;
if (pckt_size > MAX_PCKT_SIZE) {
offset = pckt_size - ICMP_TOOBIG_SIZE;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_PASS;
return send_icmp4_too_big(xdp);
}
return XDP_PASS;
}
SEC("xdp_icmp")
int _xdp_icmp(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
struct ethhdr *eth = data;
__u16 h_proto;
if (eth + 1 > data_end)
return XDP_DROP;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
return handle_ipv4(xdp);
else
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
#include "bpf_load.h"
#include "libbpf.h"
#include "bpf_util.h"
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
static __u32 xdp_flags;
static void int_exit(int sig)
{
if (ifindex > -1)
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
exit(0);
}
/* simple "icmp packet too big sent" counter
*/
static void poll_stats(unsigned int kill_after_s)
{
time_t started_at = time(NULL);
__u64 value = 0;
int key = 0;
while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
sleep(STATS_INTERVAL_S);
assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0);
printf("icmp \"packet too big\" sent: %10llu pkts\n", value);
}
}
static void usage(const char *cmd)
{
printf("Start a XDP prog which send ICMP \"packet too big\" \n"
"messages if ingress packet is bigger then MAX_SIZE bytes\n");
printf("Usage: %s [...]\n", cmd);
printf(" -i <ifindex> Interface Index\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
printf(" -h Display this help\n");
}
int main(int argc, char **argv)
{
unsigned char opt_flags[256] = {};
unsigned int kill_after_s = 0;
const char *optstr = "i:T:SNh";
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int opt;
int i;
for (i = 0; i < strlen(optstr); i++)
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
opt_flags[(unsigned char)optstr[i]] = 1;
while ((opt = getopt(argc, argv, optstr)) != -1) {
switch (opt) {
case 'i':
ifindex = atoi(optarg);
break;
case 'T':
kill_after_s = atoi(optarg);
break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
default:
usage(argv[0]);
return 1;
}
opt_flags[opt] = 0;
}
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
usage(argv[0]);
return 1;
}
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(kill_after_s);
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
return 0;
}
......@@ -330,7 +330,7 @@ static void stats_print(struct stats_record *stats_rec,
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "Exception", i,
0.0, pps, err2str(rec_i));
0.0, pps, action2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
......
......@@ -26,7 +26,8 @@ MAP COMMANDS
| **bpftool** **cgroup help**
|
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
......@@ -63,7 +64,13 @@ DESCRIPTION
**egress** egress path of the inet socket (since 4.10);
**sock_create** opening of an inet socket (since 4.10);
**sock_ops** various socket operations (since 4.12);
**device** device access (since 4.15).
**device** device access (since 4.15);
**bind4** call to bind(2) for an inet4 socket (since 4.17);
**bind6** call to bind(2) for an inet6 socket (since 4.17);
**post_bind4** return from bind(2) for an inet4 socket (since 4.17);
**post_bind6** return from bind(2) for an inet6 socket (since 4.17);
**connect4** call to connect(2) for an inet4 socket (since 4.17);
**connect6** call to connect(2) for an inet6 socket (since 4.17).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
......
......@@ -23,10 +23,10 @@ MAP COMMANDS
| **bpftool** **map { show | list }** [*MAP*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* **key** *BYTES*
| **bpftool** **map getnext** *MAP* [**key** *BYTES*]
| **bpftool** **map delete** *MAP* **key** *BYTES*
| **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES*
| **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*]
| **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES*
| **bpftool** **map pin** *MAP* *FILE*
| **bpftool** **map help**
|
......@@ -48,20 +48,26 @@ DESCRIPTION
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*.
**bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
**bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
Update map entry for a given *KEY*.
*UPDATE_FLAGS* can be one of: **any** update existing entry
or add if doesn't exit; **exist** update only if entry already
exists; **noexist** update only if entry doesn't exist.
**bpftool map lookup** *MAP* **key** *BYTES*
If the **hex** keyword is provided in front of the bytes
sequence, the bytes are parsed as hexadeximal values, even if
no "0x" prefix is added. If the keyword is not provided, then
the bytes are parsed as decimal values, unless a "0x" prefix
(for hexadecimal) or a "0" prefix (for octal) is provided.
**bpftool map lookup** *MAP* **key** [**hex**] *BYTES*
Lookup **key** in the map.
**bpftool map getnext** *MAP* [**key** *BYTES*]
**bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*]
Get next key. If *key* is not specified, get first key.
**bpftool map delete** *MAP* **key** *BYTES*
**bpftool map delete** *MAP* **key** [**hex**] *BYTES*
Remove entry from the map.
**bpftool map pin** *MAP* *FILE*
......@@ -98,7 +104,12 @@ EXAMPLES
10: hash name some_map flags 0x0
key 4B value 8B max_entries 2048 memlock 167936B
**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
The following three commands are equivalent:
|
| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c**
| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76**
**# bpftool map lookup id 10 key 0 1 2 3**
......
......@@ -147,7 +147,7 @@ _bpftool()
# Deal with simplest keywords
case $prev in
help|key|opcodes|visual)
help|hex|opcodes|visual)
return 0
;;
tag)
......@@ -283,7 +283,7 @@ _bpftool()
return 0
;;
key)
return 0
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
*)
_bpftool_once_attr 'key'
......@@ -302,7 +302,7 @@ _bpftool()
return 0
;;
key)
return 0
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
value)
# We can have bytes, or references to a prog or a
......@@ -321,6 +321,8 @@ _bpftool()
return 0
;;
*)
COMPREPLY+=( $( compgen -W 'hex' \
-- "$cur" ) )
return 0
;;
esac
......@@ -372,7 +374,8 @@ _bpftool()
;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
device'
device bind4 bind6 post_bind4 post_bind6 connect4 \
connect6'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag'
case $prev in
......@@ -380,7 +383,8 @@ _bpftool()
_filedir
return 0
;;
ingress|egress|sock_create|sock_ops|device)
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
......
......@@ -17,7 +17,10 @@
"ATTACH_FLAGS := { multi | override }"
#define HELP_SPEC_ATTACH_TYPES \
"ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
" ATTACH_TYPE := { ingress | egress | sock_create |\n" \
" sock_ops | device | bind4 | bind6 |\n" \
" post_bind4 | post_bind6 | connect4 |\n" \
" connect6 }"
static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_INGRESS] = "ingress",
......@@ -25,6 +28,12 @@ static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
[BPF_CGROUP_SOCK_OPS] = "sock_ops",
[BPF_CGROUP_DEVICE] = "device",
[BPF_CGROUP_INET4_BIND] = "bind4",
[BPF_CGROUP_INET6_BIND] = "bind6",
[BPF_CGROUP_INET4_CONNECT] = "connect4",
[BPF_CGROUP_INET6_CONNECT] = "connect6",
[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
......@@ -282,7 +291,7 @@ static int do_help(int argc, char **argv)
" %s %s detach CGROUP ATTACH_TYPE PROG\n"
" %s %s help\n"
"\n"
" " HELP_SPEC_ATTACH_TYPES "\n"
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
......
......@@ -283,11 +283,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
static char **parse_bytes(char **argv, const char *name, unsigned char *val,
unsigned int n)
{
unsigned int i = 0;
unsigned int i = 0, base = 0;
char *endptr;
if (is_prefix(*argv, "hex")) {
base = 16;
argv++;
}
while (i < n && argv[i]) {
val[i] = strtoul(argv[i], &endptr, 0);
val[i] = strtoul(argv[i], &endptr, base);
if (*endptr) {
p_err("error parsing byte: %s", argv[i]);
return NULL;
......@@ -869,10 +874,10 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %s %s { show | list } [MAP]\n"
" %s %s dump MAP\n"
" %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n"
" %s %s lookup MAP key BYTES\n"
" %s %s getnext MAP [key BYTES]\n"
" %s %s delete MAP key BYTES\n"
" %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n"
" %s %s lookup MAP key [hex] BYTES\n"
" %s %s getnext MAP [key [hex] BYTES]\n"
" %s %s delete MAP key [hex] BYTES\n"
" %s %s pin MAP FILE\n"
" %s %s help\n"
"\n"
......
......@@ -68,6 +68,9 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
[BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
[BPF_PROG_TYPE_SK_MSG] = "sk_msg",
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
......
......@@ -95,6 +95,7 @@ enum bpf_cmd {
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
};
enum bpf_map_type {
......@@ -279,6 +280,9 @@ union bpf_attr {
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_id; /* BTF type_id of the key */
__u32 btf_value_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
......@@ -363,6 +367,14 @@ union bpf_attr {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
struct { /* anonymous struct for BPF_BTF_LOAD */
__aligned_u64 btf;
__aligned_u64 btf_log_buf;
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
};
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -755,6 +767,13 @@ union bpf_attr {
* @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code
*
* int bpf_xdp_adjust_tail(xdp_md, delta)
* Adjust the xdp_md.data_end by delta. Only shrinking of packet's
* size is supported.
* @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -821,7 +840,8 @@ union bpf_attr {
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data), \
FN(bind),
FN(bind), \
FN(xdp_adjust_tail),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (c) 2018 Facebook */
#ifndef _UAPI__LINUX_BTF_H__
#define _UAPI__LINUX_BTF_H__
#include <linux/types.h>
#define BTF_MAGIC 0xeB9F
#define BTF_MAGIC_SWAP 0x9FeB
#define BTF_VERSION 1
#define BTF_FLAGS_COMPR 0x01
struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 parent_label;
__u32 parent_name;
/* All offsets are in bytes relative to the end of this header */
__u32 label_off; /* offset of label section */
__u32 object_off; /* offset of data object section*/
__u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
#define BTF_MAX_TYPE 0x7fffffff
/* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x7fffffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
/* The type id is referring to a parent BTF */
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
/* String is in the ELF string section */
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
struct btf_type {
__u32 name;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bits 31: root
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
* "type" is a type_id referring to another type.
*/
union {
__u32 size;
__u32 type;
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
#define BTF_KIND_INT 1 /* Integer */
#define BTF_KIND_PTR 2 /* Pointer */
#define BTF_KIND_ARRAY 3 /* Array */
#define BTF_KIND_STRUCT 4 /* Struct */
#define BTF_KIND_UNION 5 /* Union */
#define BTF_KIND_ENUM 6 /* Enumeration */
#define BTF_KIND_FWD 7 /* Forward */
#define BTF_KIND_TYPEDEF 8 /* Typedef */
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_MAX 11
#define NR_BTF_KINDS 12
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
*/
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED 0x1
#define BTF_INT_CHAR 0x2
#define BTF_INT_BOOL 0x4
#define BTF_INT_VARARGS 0x8
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the
* info in "struct btf_type").
*/
struct btf_enum {
__u32 name;
__s32 val;
};
/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
struct btf_array {
__u32 type;
__u32 index_type;
__u32 nelems;
};
/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
* by multiple "struct btf_member". The exact number
* of btf_member is stored in the vlen (of the info in
* "struct btf_type").
*/
struct btf_member {
__u32 name;
__u32 type;
__u32 offset; /* offset in bits */
};
#endif /* _UAPI__LINUX_BTF_H__ */
libbpf-y := libbpf.o bpf.o nlattr.o
libbpf-y := libbpf.o bpf.o nlattr.o btf.o
......@@ -73,43 +73,76 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
return syscall(__NR_bpf, cmd, attr, size);
}
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags, int node)
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
__u32 name_len = name ? strlen(name) : 0;
__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
attr.map_type = map_type;
attr.key_size = key_size;
attr.value_size = value_size;
attr.max_entries = max_entries;
attr.map_flags = map_flags;
memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.map_type = create_attr->map_type;
attr.key_size = create_attr->key_size;
attr.value_size = create_attr->value_size;
attr.max_entries = create_attr->max_entries;
attr.map_flags = create_attr->map_flags;
memcpy(attr.map_name, create_attr->name,
min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
attr.btf_key_id = create_attr->btf_key_id;
attr.btf_value_id = create_attr->btf_value_id;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags, int node)
{
struct bpf_create_map_attr map_attr = {};
map_attr.name = name;
map_attr.map_type = map_type;
map_attr.map_flags = map_flags;
map_attr.key_size = key_size;
map_attr.value_size = value_size;
map_attr.max_entries = max_entries;
if (node >= 0) {
attr.map_flags |= BPF_F_NUMA_NODE;
attr.numa_node = node;
map_attr.numa_node = node;
map_attr.map_flags |= BPF_F_NUMA_NODE;
}
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags)
{
return bpf_create_map_node(map_type, NULL, key_size, value_size,
max_entries, map_flags, -1);
struct bpf_create_map_attr map_attr = {};
map_attr.map_type = map_type;
map_attr.map_flags = map_flags;
map_attr.key_size = key_size;
map_attr.value_size = value_size;
map_attr.max_entries = max_entries;
return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags)
{
return bpf_create_map_node(map_type, name, key_size, value_size,
max_entries, map_flags, -1);
struct bpf_create_map_attr map_attr = {};
map_attr.name = name;
map_attr.map_type = map_type;
map_attr.map_flags = map_flags;
map_attr.key_size = key_size;
map_attr.value_size = value_size;
map_attr.max_entries = max_entries;
return bpf_create_map_xattr(&map_attr);
}
int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
......@@ -573,3 +606,28 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
close(sock);
return ret;
}
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log)
{
union bpf_attr attr = {};
int fd;
attr.btf = ptr_to_u64(btf);
attr.btf_size = btf_size;
retry:
if (do_log && log_buf && log_buf_size) {
attr.btf_log_level = 1;
attr.btf_log_size = log_buf_size;
attr.btf_log_buf = ptr_to_u64(log_buf);
}
fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
if (fd == -1 && !do_log && log_buf && log_buf_size) {
do_log = true;
goto retry;
}
return fd;
}
......@@ -24,8 +24,23 @@
#define __BPF_BPF_H
#include <linux/bpf.h>
#include <stdbool.h>
#include <stddef.h>
struct bpf_create_map_attr {
const char *name;
enum bpf_map_type map_type;
__u32 map_flags;
__u32 key_size;
__u32 value_size;
__u32 max_entries;
__u32 numa_node;
__u32 btf_fd;
__u32 btf_key_id;
__u32 btf_value_id;
};
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int value_size, int max_entries,
__u32 map_flags, int node);
......@@ -87,4 +102,6 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log);
#endif
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
#include "btf.h"
#include "bpf.h"
#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
#define BTF_MAX_NR_TYPES 65535
static struct btf_type btf_void;
struct btf {
union {
struct btf_header *hdr;
void *data;
};
struct btf_type **types;
const char *strings;
void *nohdr_data;
uint32_t nr_types;
uint32_t types_size;
uint32_t data_size;
int fd;
};
static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
{
if (!BTF_STR_TBL_ELF_ID(offset) &&
BTF_STR_OFFSET(offset) < btf->hdr->str_len)
return &btf->strings[BTF_STR_OFFSET(offset)];
else
return NULL;
}
static int btf_add_type(struct btf *btf, struct btf_type *t)
{
if (btf->types_size - btf->nr_types < 2) {
struct btf_type **new_types;
u32 expand_by, new_size;
if (btf->types_size == BTF_MAX_NR_TYPES)
return -E2BIG;
expand_by = max(btf->types_size >> 2, 16);
new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
new_types = realloc(btf->types, sizeof(*new_types) * new_size);
if (!new_types)
return -ENOMEM;
if (btf->nr_types == 0)
new_types[0] = &btf_void;
btf->types = new_types;
btf->types_size = new_size;
}
btf->types[++(btf->nr_types)] = t;
return 0;
}
static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
{
const struct btf_header *hdr = btf->hdr;
u32 meta_left;
if (btf->data_size < sizeof(struct btf_header)) {
elog("BTF header not found\n");
return -EINVAL;
}
if (hdr->magic != BTF_MAGIC) {
elog("Invalid BTF magic:%x\n", hdr->magic);
return -EINVAL;
}
if (hdr->version != BTF_VERSION) {
elog("Unsupported BTF version:%u\n", hdr->version);
return -ENOTSUP;
}
if (hdr->flags) {
elog("Unsupported BTF flags:%x\n", hdr->flags);
return -ENOTSUP;
}
meta_left = btf->data_size - sizeof(*hdr);
if (!meta_left) {
elog("BTF has no data\n");
return -EINVAL;
}
if (meta_left < hdr->type_off) {
elog("Invalid BTF type section offset:%u\n", hdr->type_off);
return -EINVAL;
}
if (meta_left < hdr->str_off) {
elog("Invalid BTF string section offset:%u\n", hdr->str_off);
return -EINVAL;
}
if (hdr->type_off >= hdr->str_off) {
elog("BTF type section offset >= string section offset. No type?\n");
return -EINVAL;
}
if (hdr->type_off & 0x02) {
elog("BTF type section is not aligned to 4 bytes\n");
return -EINVAL;
}
btf->nohdr_data = btf->hdr + 1;
return 0;
}
static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
{
const struct btf_header *hdr = btf->hdr;
const char *start = btf->nohdr_data + hdr->str_off;
const char *end = start + btf->hdr->str_len;
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) {
elog("Invalid BTF string section\n");
return -EINVAL;
}
btf->strings = start;
return 0;
}
static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
{
struct btf_header *hdr = btf->hdr;
void *nohdr_data = btf->nohdr_data;
void *next_type = nohdr_data + hdr->type_off;
void *end_type = nohdr_data + hdr->str_off;
while (next_type < end_type) {
struct btf_type *t = next_type;
uint16_t vlen = BTF_INFO_VLEN(t->info);
int err;
next_type += sizeof(*t);
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
next_type += sizeof(int);
break;
case BTF_KIND_ARRAY:
next_type += sizeof(struct btf_array);
break;
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
next_type += vlen * sizeof(struct btf_member);
break;
case BTF_KIND_ENUM:
next_type += vlen * sizeof(struct btf_enum);
break;
case BTF_KIND_TYPEDEF:
case BTF_KIND_PTR:
case BTF_KIND_FWD:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
break;
default:
elog("Unsupported BTF_KIND:%u\n",
BTF_INFO_KIND(t->info));
return -EINVAL;
}
err = btf_add_type(btf, t);
if (err)
return err;
}
return 0;
}
static const struct btf_type *btf_type_by_id(const struct btf *btf,
uint32_t type_id)
{
if (type_id > btf->nr_types)
return NULL;
return btf->types[type_id];
}
static bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
}
static bool btf_type_is_void_or_null(const struct btf_type *t)
{
return !t || btf_type_is_void(t);
}
static int64_t btf_type_size(const struct btf_type *t)
{
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
return t->size;
case BTF_KIND_PTR:
return sizeof(void *);
default:
return -EINVAL;
}
}
#define MAX_RESOLVE_DEPTH 32
int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
{
const struct btf_array *array;
const struct btf_type *t;
uint32_t nelems = 1;
int64_t size = -1;
int i;
t = btf_type_by_id(btf, type_id);
for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
i++) {
size = btf_type_size(t);
if (size >= 0)
break;
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
type_id = t->type;
break;
case BTF_KIND_ARRAY:
array = (const struct btf_array *)(t + 1);
if (nelems && array->nelems > UINT32_MAX / nelems)
return -E2BIG;
nelems *= array->nelems;
type_id = array->type;
break;
default:
return -EINVAL;
}
t = btf_type_by_id(btf, type_id);
}
if (size < 0)
return -EINVAL;
if (nelems && size > UINT32_MAX / nelems)
return -E2BIG;
return nelems * size;
}
int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
{
uint32_t i;
if (!strcmp(type_name, "void"))
return 0;
for (i = 1; i <= btf->nr_types; i++) {
const struct btf_type *t = btf->types[i];
const char *name = btf_name_by_offset(btf, t->name);
if (name && !strcmp(type_name, name))
return i;
}
return -ENOENT;
}
void btf__free(struct btf *btf)
{
if (!btf)
return;
if (btf->fd != -1)
close(btf->fd);
free(btf->data);
free(btf->types);
free(btf);
}
struct btf *btf__new(uint8_t *data, uint32_t size,
btf_print_fn_t err_log)
{
uint32_t log_buf_size = 0;
char *log_buf = NULL;
struct btf *btf;
int err;
btf = calloc(1, sizeof(struct btf));
if (!btf)
return ERR_PTR(-ENOMEM);
btf->fd = -1;
if (err_log) {
log_buf = malloc(BPF_LOG_BUF_SIZE);
if (!log_buf) {
err = -ENOMEM;
goto done;
}
*log_buf = 0;
log_buf_size = BPF_LOG_BUF_SIZE;
}
btf->data = malloc(size);
if (!btf->data) {
err = -ENOMEM;
goto done;
}
memcpy(btf->data, data, size);
btf->data_size = size;
btf->fd = bpf_load_btf(btf->data, btf->data_size,
log_buf, log_buf_size, false);
if (btf->fd == -1) {
err = -errno;
elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
if (log_buf && *log_buf)
elog("%s\n", log_buf);
goto done;
}
err = btf_parse_hdr(btf, err_log);
if (err)
goto done;
err = btf_parse_str_sec(btf, err_log);
if (err)
goto done;
err = btf_parse_type_sec(btf, err_log);
done:
free(log_buf);
if (err) {
btf__free(btf);
return ERR_PTR(err);
}
return btf;
}
int btf__fd(const struct btf *btf)
{
return btf->fd;
}
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#ifndef __BPF_BTF_H
#define __BPF_BTF_H
#include <stdint.h>
#define BTF_ELF_SEC ".BTF"
struct btf;
typedef int (*btf_print_fn_t)(const char *, ...)
__attribute__((format(printf, 1, 2)));
void btf__free(struct btf *btf);
struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log);
int32_t btf__find_by_name(const struct btf *btf, const char *type_name);
int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id);
int btf__fd(const struct btf *btf);
#endif
......@@ -45,6 +45,7 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
#ifndef EM_BPF
#define EM_BPF 247
......@@ -212,6 +213,8 @@ struct bpf_map {
char *name;
size_t offset;
struct bpf_map_def def;
uint32_t btf_key_id;
uint32_t btf_value_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
};
......@@ -256,6 +259,8 @@ struct bpf_object {
*/
struct list_head list;
struct btf *btf;
void *priv;
bpf_object_clear_priv_t clear_priv;
......@@ -819,7 +824,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size);
else if (strcmp(name, "maps") == 0)
obj->efile.maps_shndx = idx;
else if (sh.sh_type == SHT_SYMTAB) {
else if (strcmp(name, BTF_ELF_SEC) == 0) {
obj->btf = btf__new(data->d_buf, data->d_size,
__pr_debug);
if (IS_ERR(obj->btf)) {
pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
BTF_ELF_SEC, PTR_ERR(obj->btf));
obj->btf = NULL;
}
} else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
......@@ -996,33 +1009,126 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
return 0;
}
static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
{
struct bpf_map_def *def = &map->def;
const size_t max_name = 256;
int64_t key_size, value_size;
int32_t key_id, value_id;
char name[max_name];
/* Find key type by name from BTF */
if (snprintf(name, max_name, "%s_key", map->name) == max_name) {
pr_warning("map:%s length of BTF key_type:%s_key is too long\n",
map->name, map->name);
return -EINVAL;
}
key_id = btf__find_by_name(btf, name);
if (key_id < 0) {
pr_debug("map:%s key_type:%s cannot be found in BTF\n",
map->name, name);
return key_id;
}
key_size = btf__resolve_size(btf, key_id);
if (key_size < 0) {
pr_warning("map:%s key_type:%s cannot get the BTF type_size\n",
map->name, name);
return key_size;
}
if (def->key_size != key_size) {
pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
map->name, name, key_size, def->key_size);
return -EINVAL;
}
/* Find value type from BTF */
if (snprintf(name, max_name, "%s_value", map->name) == max_name) {
pr_warning("map:%s length of BTF value_type:%s_value is too long\n",
map->name, map->name);
return -EINVAL;
}
value_id = btf__find_by_name(btf, name);
if (value_id < 0) {
pr_debug("map:%s value_type:%s cannot be found in BTF\n",
map->name, name);
return value_id;
}
value_size = btf__resolve_size(btf, value_id);
if (value_size < 0) {
pr_warning("map:%s value_type:%s cannot get the BTF type_size\n",
map->name, name);
return value_size;
}
if (def->value_size != value_size) {
pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
map->name, name, value_size, def->value_size);
return -EINVAL;
}
map->btf_key_id = key_id;
map->btf_value_id = value_id;
return 0;
}
static int
bpf_object__create_maps(struct bpf_object *obj)
{
struct bpf_create_map_attr create_attr = {};
unsigned int i;
int err;
for (i = 0; i < obj->nr_maps; i++) {
struct bpf_map_def *def = &obj->maps[i].def;
int *pfd = &obj->maps[i].fd;
*pfd = bpf_create_map_name(def->type,
obj->maps[i].name,
def->key_size,
def->value_size,
def->max_entries,
def->map_flags);
struct bpf_map *map = &obj->maps[i];
struct bpf_map_def *def = &map->def;
int *pfd = &map->fd;
create_attr.name = map->name;
create_attr.map_type = def->type;
create_attr.map_flags = def->map_flags;
create_attr.key_size = def->key_size;
create_attr.value_size = def->value_size;
create_attr.max_entries = def->max_entries;
create_attr.btf_fd = 0;
create_attr.btf_key_id = 0;
create_attr.btf_value_id = 0;
if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
create_attr.btf_fd = btf__fd(obj->btf);
create_attr.btf_key_id = map->btf_key_id;
create_attr.btf_value_id = map->btf_value_id;
}
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_id) {
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, strerror(errno), errno);
create_attr.btf_fd = 0;
create_attr.btf_key_id = 0;
create_attr.btf_value_id = 0;
map->btf_key_id = 0;
map->btf_value_id = 0;
*pfd = bpf_create_map_xattr(&create_attr);
}
if (*pfd < 0) {
size_t j;
int err = *pfd;
err = *pfd;
pr_warning("failed to create map (name: '%s'): %s\n",
obj->maps[i].name,
map->name,
strerror(errno));
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
}
pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd);
pr_debug("create map %s: fd=%d\n", map->name, *pfd);
}
return 0;
......@@ -1641,6 +1747,7 @@ void bpf_object__close(struct bpf_object *obj)
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
btf__free(obj->btf);
for (i = 0; i < obj->nr_maps; i++) {
zfree(&obj->maps[i].name);
......@@ -1692,6 +1799,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
return obj ? obj->kern_version : 0;
}
int bpf_object__btf_fd(const struct bpf_object *obj)
{
return obj->btf ? btf__fd(obj->btf) : -1;
}
int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv)
{
......@@ -1845,6 +1957,7 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
......@@ -1859,6 +1972,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
#define BPF_S_PROG_SEC(string, ptype) \
BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
#define BPF_SA_PROG_SEC(string, ptype) \
BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
......@@ -1874,6 +1990,7 @@ static const struct {
BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
......@@ -1889,10 +2006,13 @@ static const struct {
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
};
#undef BPF_PROG_SEC
#undef BPF_PROG_SEC_FULL
#undef BPF_S_PROG_SEC
#undef BPF_SA_PROG_SEC
static int bpf_program__identify_section(struct bpf_program *prog)
......@@ -1929,6 +2049,16 @@ const char *bpf_map__name(struct bpf_map *map)
return map ? map->name : NULL;
}
uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
{
return map ? map->btf_key_id : 0;
}
uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
{
return map ? map->btf_value_id : 0;
}
int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv)
{
......
......@@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
const char *bpf_object__name(struct bpf_object *obj);
unsigned int bpf_object__kversion(struct bpf_object *obj);
int bpf_object__btf_fd(const struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
......@@ -185,6 +186,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n);
*/
int bpf_program__set_socket_filter(struct bpf_program *prog);
int bpf_program__set_tracepoint(struct bpf_program *prog);
int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
int bpf_program__set_kprobe(struct bpf_program *prog);
int bpf_program__set_sched_cls(struct bpf_program *prog);
int bpf_program__set_sched_act(struct bpf_program *prog);
......@@ -194,6 +196,7 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
bool bpf_program__is_socket_filter(struct bpf_program *prog);
bool bpf_program__is_tracepoint(struct bpf_program *prog);
bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
bool bpf_program__is_kprobe(struct bpf_program *prog);
bool bpf_program__is_sched_cls(struct bpf_program *prog);
bool bpf_program__is_sched_act(struct bpf_program *prog);
......@@ -239,6 +242,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
int bpf_map__fd(struct bpf_map *map);
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
const char *bpf_map__name(struct bpf_map *map);
uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
int bpf_map__set_priv(struct bpf_map *map, void *priv,
......
......@@ -24,14 +24,15 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_sock_addr
test_sock test_sock_addr test_btf
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
......@@ -66,6 +67,8 @@ $(BPFOBJ): force
CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BTF_PAHOLE ?= pahole
PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
......@@ -83,9 +86,26 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help |& grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help |& grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version |& grep LLVM)
ifneq ($(BTF_LLC_PROBE),)
ifneq ($(BTF_PAHOLE_PROBE),)
ifneq ($(BTF_OBJCOPY_PROBE),)
CLANG_FLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
endif
endif
endif
$(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
......@@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_tail;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......@@ -129,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
(void *) BPF_FUNC_csum_diff;
static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
(void *) BPF_FUNC_skb_under_cgroup;
static int (*bpf_skb_change_head)(void *, int len, int flags) =
......
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include "bpf_helpers.h"
int _version SEC("version") = 1;
SEC("xdp_adjust_tail")
int _xdp_adjust_tail(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int offset = 0;
if (data_end - data == 54)
offset = 256;
else
offset = 20;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
}
char _license[] SEC("license") = "GPL";
此差异已折叠。
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <linux/bpf.h>
#include "bpf_helpers.h"
int _version SEC("version") = 1;
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
typedef int btf_map_key;
typedef struct ipv_counts btf_map_value;
btf_map_key dumm_key;
btf_map_value dummy_value;
struct bpf_map_def SEC("maps") btf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct ipv_counts),
.max_entries = 4,
};
struct dummy_tracepoint_args {
unsigned long long pad;
struct sock *sock;
};
SEC("dummy_tracepoint")
int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
{
struct ipv_counts *counts;
int key = 0;
if (!arg->sock)
return 0;
counts = bpf_map_lookup_elem(&btf_map, &key);
if (!counts)
return 0;
counts->v6++;
return 0;
}
char _license[] SEC("license") = "GPL";
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018 Facebook */
#include <linux/bpf.h>
#include "bpf_helpers.h"
int _version SEC("version") = 1;
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
struct bpf_map_def SEC("maps") btf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct ipv_counts),
.max_entries = 4,
};
struct dummy_tracepoint_args {
unsigned long long pad;
struct sock *sock;
};
SEC("dummy_tracepoint")
int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
{
struct ipv_counts *counts;
int key = 0;
if (!arg->sock)
return 0;
counts = bpf_map_lookup_elem(&btf_map, &key);
if (!counts)
return 0;
counts->v6++;
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -166,6 +166,37 @@ static void test_xdp(void)
bpf_object__close(obj);
}
static void test_xdp_adjust_tail(void)
{
const char *file = "./test_adjust_tail.o";
struct bpf_object *obj;
char buf[128];
__u32 duration, retval, size;
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (err) {
error_cnt++;
return;
}
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_DROP,
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || errno || retval != XDP_TX || size != 54,
"ipv6", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
bpf_object__close(obj);
}
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
......@@ -1177,6 +1208,7 @@ int main(void)
{
test_pkt_access();
test_xdp();
test_xdp_adjust_tail();
test_l4lb_all();
test_xdp_noinline();
test_tcp_estats();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册