提交 d4069fe6 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-03-31

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add raw BPF tracepoint API in order to have a BPF program type that
   can access kernel internal arguments of the tracepoints in their
   raw form similar to kprobes based BPF programs. This infrastructure
   also adds a new BPF_RAW_TRACEPOINT_OPEN command to BPF syscall which
   returns an anon-inode backed fd for the tracepoint object that allows
   for automatic detach of the BPF program resp. unregistering of the
   tracepoint probe on fd release, from Alexei.

2) Add new BPF cgroup hooks at bind() and connect() entry in order to
   allow BPF programs to reject, inspect or modify user space passed
   struct sockaddr, and as well a hook at post bind time once the port
   has been allocated. They are used in FB's container management engine
   for implementing policy, replacing fragile LD_PRELOAD wrapper
   intercepting bind() and connect() calls that only works in limited
   scenarios like glibc based apps but not for other runtimes in
   containerized applications, from Andrey.

3) BPF_F_INGRESS flag support has been added to sockmap programs for
   their redirect helper call bringing it in line with cls_bpf based
   programs. Support is added for both variants of sockmap programs,
   meaning for tx ULP hooks as well as recv skb hooks, from John.

4) Various improvements on BPF side for the nfp driver, besides others
   this work adds BPF map update and delete helper call support from
   the datapath, JITing of 32 and 64 bit XADD instructions as well as
   offload support of bpf_get_prandom_u32() call. Initial implementation
   of nfp packet cache has been tackled that optimizes memory access
   (see merge commit for further details), from Jakub and Jiong.

5) Removal of struct bpf_verifier_env argument from the print_bpf_insn()
   API has been done in order to prepare to use print_bpf_insn() soon
   out of perf tool directly. This makes the print_bpf_insn() API more
   generic and pushes the env into private data. bpftool is adjusted
   as well with the print_bpf_insn() argument removal, from Jiri.

6) Couple of cleanups and prep work for the upcoming BTF (BPF Type
   Format). The latter will reuse the current BPF verifier log as
   well, thus bpf_verifier_log() is further generalized, from Martin.

7) For bpf_getsockopt() and bpf_setsockopt() helpers, IPv4 IP_TOS read
   and write support has been added in similar fashion to existing
   IPv6 IPV6_TCLASS socket option we already have, from Nikita.

8) Fixes in recent sockmap scatterlist API usage, which did not use
   sg_init_table() for initialization thus triggering a BUG_ON() in
   scatterlist API when CONFIG_DEBUG_SG was enabled. This adds and
   uses a small helper sg_init_marker() to properly handle the affected
   cases, from Prashant.

9) Let the BPF core follow IDR code convention and therefore use the
   idr_preload() and idr_preload_end() helpers, which would also help
   idr_alloc_cyclic() under GFP_ATOMIC to better succeed under memory
   pressure, from Shaohua.

10) Last but not least, a spelling fix in an error message for the
    BPF cookie UID helper under BPF sample code, from Colin.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
if (copy_to_user((void __user *)arg, &cinfo, len))
return -EFAULT;
......
......@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
TRACE_EVENT(hfi1_ctxt_info,
TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
unsigned int subctxt,
struct hfi1_ctxt_info cinfo),
struct hfi1_ctxt_info *cinfo),
TP_ARGS(dd, ctxt, subctxt, cinfo),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
......@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->egrtids = cinfo.egrtids;
__entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
__entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
__entry->sdma_ring_size = cinfo.sdma_ring_size;
__entry->rcvegr_size = cinfo.rcvegr_size;
__entry->egrtids = cinfo->egrtids;
__entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
__entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
__entry->sdma_ring_size = cinfo->sdma_ring_size;
__entry->rcvegr_size = cinfo->rcvegr_size;
),
TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
__get_str(dev),
......
......@@ -218,17 +218,17 @@ nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
return skb;
hdr = (struct cmsg_hdr *)skb->data;
/* 0 reply_size means caller will do the validation */
if (reply_size && skb->len != reply_size) {
cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
skb->len, reply_size);
goto err_free;
}
if (hdr->type != __CMSG_REPLY(type)) {
cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
hdr->type, __CMSG_REPLY(type));
goto err_free;
}
/* 0 reply_size means caller will do the validation */
if (reply_size && skb->len != reply_size) {
cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
type, skb->len, reply_size);
goto err_free;
}
return skb;
err_free:
......
......@@ -41,6 +41,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_FUNC = 1,
NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
NFP_BPF_CAP_TYPE_MAPS = 3,
NFP_BPF_CAP_TYPE_RANDOM = 4,
};
struct nfp_bpf_cap_tlv_func {
......
......@@ -284,6 +284,12 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
case BPF_FUNC_map_lookup_elem:
bpf->helpers.map_lookup = readl(&cap->func_addr);
break;
case BPF_FUNC_map_update_elem:
bpf->helpers.map_update = readl(&cap->func_addr);
break;
case BPF_FUNC_map_delete_elem:
bpf->helpers.map_delete = readl(&cap->func_addr);
break;
}
return 0;
......@@ -309,6 +315,14 @@ nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
return 0;
}
static int
nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value,
u32 length)
{
bpf->pseudo_random = true;
return 0;
}
static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{
struct nfp_cpp *cpp = app->pf->cpp;
......@@ -347,6 +361,10 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
if (nfp_bpf_parse_cap_maps(app->priv, value, length))
goto err_release_free;
break;
case NFP_BPF_CAP_TYPE_RANDOM:
if (nfp_bpf_parse_cap_random(app->priv, value, length))
goto err_release_free;
break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
......
......@@ -72,6 +72,7 @@ enum nfp_relo_type {
#define BR_OFF_RELO 15000
enum static_regs {
STATIC_REG_IMMA = 20, /* Bank AB */
STATIC_REG_IMM = 21, /* Bank AB */
STATIC_REG_STACK = 22, /* Bank A */
STATIC_REG_PKT_LEN = 22, /* Bank B */
......@@ -91,6 +92,8 @@ enum pkt_vec {
#define pptr_reg(np) pv_ctm_ptr(np)
#define imm_a(np) reg_a(STATIC_REG_IMM)
#define imm_b(np) reg_b(STATIC_REG_IMM)
#define imma_a(np) reg_a(STATIC_REG_IMMA)
#define imma_b(np) reg_b(STATIC_REG_IMMA)
#define imm_both(np) reg_both(STATIC_REG_IMM)
#define NFP_BPF_ABI_FLAGS reg_imm(0)
......@@ -128,6 +131,10 @@ enum pkt_vec {
*
* @helpers: helper addressess for various calls
* @helpers.map_lookup: map lookup helper address
* @helpers.map_update: map update helper address
* @helpers.map_delete: map delete helper address
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
*/
struct nfp_app_bpf {
struct nfp_app *app;
......@@ -162,7 +169,18 @@ struct nfp_app_bpf {
struct {
u32 map_lookup;
u32 map_update;
u32 map_delete;
} helpers;
bool pseudo_random;
};
enum nfp_bpf_map_use {
NFP_MAP_UNUSED = 0,
NFP_MAP_USE_READ,
NFP_MAP_USE_WRITE,
NFP_MAP_USE_ATOMIC_CNT,
};
/**
......@@ -171,12 +189,14 @@ struct nfp_app_bpf {
* @bpf: back pointer to bpf app private structure
* @tid: table id identifying map on datapath
* @l: link on the nfp_app_bpf->map_list list
* @use_map: map of how the value is used (in 4B chunks)
*/
struct nfp_bpf_map {
struct bpf_offloaded_map *offmap;
struct nfp_app_bpf *bpf;
u32 tid;
struct list_head l;
enum nfp_bpf_map_use use_map[];
};
struct nfp_prog;
......@@ -190,6 +210,16 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
#define nfp_meta_next(meta) list_next_entry(meta, l)
#define nfp_meta_prev(meta) list_prev_entry(meta, l)
/**
* struct nfp_bpf_reg_state - register state for calls
* @reg: BPF register state from latest path
* @var_off: for stack arg - changes stack offset on different paths
*/
struct nfp_bpf_reg_state {
struct bpf_reg_state reg;
bool var_off;
};
#define FLAG_INSN_IS_JUMP_DST BIT(0)
/**
......@@ -199,11 +229,16 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
* @ldst_gather_len: memcpy length gathered from load/store sequence
* @paired_st: the paired store insn at the head of the sequence
* @ptr_not_const: pointer is not always constant
* @pkt_cache: packet data cache information
* @pkt_cache.range_start: start offset for associated packet data cache
* @pkt_cache.range_end: end offset for associated packet data cache
* @pkt_cache.do_init: this read needs to initialize packet data cache
* @xadd_over_16bit: 16bit immediate is not guaranteed
* @xadd_maybe_16bit: 16bit immediate is possible
* @jmp_dst: destination info for jump instructions
* @func_id: function id for call instructions
* @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions
* @arg2_var_off: arg2 changes stack offset on different paths
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags
......@@ -214,18 +249,27 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
struct nfp_insn_meta {
struct bpf_insn insn;
union {
/* pointer ops (ld/st/xadd) */
struct {
struct bpf_reg_state ptr;
struct bpf_insn *paired_st;
s16 ldst_gather_len;
bool ptr_not_const;
struct {
s16 range_start;
s16 range_end;
bool do_init;
} pkt_cache;
bool xadd_over_16bit;
bool xadd_maybe_16bit;
};
/* jump */
struct nfp_insn_meta *jmp_dst;
/* function calls */
struct {
u32 func_id;
struct bpf_reg_state arg1;
struct bpf_reg_state arg2;
bool arg2_var_off;
struct nfp_bpf_reg_state arg2;
};
};
unsigned int off;
......@@ -269,6 +313,41 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
}
static inline bool is_mbpf_load_pkt(const struct nfp_insn_meta *meta)
{
return is_mbpf_load(meta) && meta->ptr.type == PTR_TO_PACKET;
}
static inline bool is_mbpf_store_pkt(const struct nfp_insn_meta *meta)
{
return is_mbpf_store(meta) && meta->ptr.type == PTR_TO_PACKET;
}
static inline bool is_mbpf_classic_load(const struct nfp_insn_meta *meta)
{
u8 code = meta->insn.code;
return BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND);
}
static inline bool is_mbpf_classic_store(const struct nfp_insn_meta *meta)
{
u8 code = meta->insn.code;
return BPF_CLASS(code) == BPF_ST && BPF_MODE(code) == BPF_MEM;
}
static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta)
{
return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET;
}
static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
{
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
}
/**
* struct nfp_prog - nfp BPF program
* @bpf: backpointer to the bpf app priv structure
......
......@@ -164,6 +164,41 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
return 0;
}
/* Atomic engine requires values to be in big endian, we need to byte swap
* the value words used with xadd.
*/
static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value)
{
u32 *word = value;
unsigned int i;
for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT)
word[i] = (__force u32)cpu_to_be32(word[i]);
}
static int
nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap,
void *key, void *value)
{
int err;
err = nfp_bpf_ctrl_lookup_entry(offmap, key, value);
if (err)
return err;
nfp_map_bpf_byte_swap(offmap->dev_priv, value);
return 0;
}
static int
nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap,
void *key, void *value, u64 flags)
{
nfp_map_bpf_byte_swap(offmap->dev_priv, value);
return nfp_bpf_ctrl_update_entry(offmap, key, value, flags);
}
static int
nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
void *key, void *next_key)
......@@ -183,8 +218,8 @@ nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
static const struct bpf_map_dev_ops nfp_bpf_map_ops = {
.map_get_next_key = nfp_bpf_map_get_next_key,
.map_lookup_elem = nfp_bpf_ctrl_lookup_entry,
.map_update_elem = nfp_bpf_ctrl_update_entry,
.map_lookup_elem = nfp_bpf_map_lookup_entry,
.map_update_elem = nfp_bpf_map_update_entry,
.map_delete_elem = nfp_bpf_map_delete_elem,
};
......@@ -192,6 +227,7 @@ static int
nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
{
struct nfp_bpf_map *nfp_map;
unsigned int use_map_size;
long long int res;
if (!bpf->maps.types)
......@@ -226,7 +262,10 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
return -ENOMEM;
}
nfp_map = kzalloc(sizeof(*nfp_map), GFP_USER);
use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) *
FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]);
nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER);
if (!nfp_map)
return -ENOMEM;
......
......@@ -97,7 +97,7 @@ nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
if (nfp_prog->adjust_head_location != meta->n)
goto exit_set_location;
if (meta->arg2.var_off.value != imm)
if (meta->arg2.reg.var_off.value != imm)
goto exit_set_location;
}
......@@ -106,15 +106,70 @@ nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
nfp_prog->adjust_head_location = location;
}
static int
nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
struct nfp_bpf_reg_state *old_arg)
{
s64 off, old_off;
if (reg->type != PTR_TO_STACK) {
pr_vlog(env, "%s: unsupported ptr type %d\n",
fname, reg->type);
return false;
}
if (!tnum_is_const(reg->var_off)) {
pr_vlog(env, "%s: variable pointer\n", fname);
return false;
}
off = reg->var_off.value + reg->off;
if (-off % 4) {
pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off);
return false;
}
/* Rest of the checks is only if we re-parse the same insn */
if (!old_arg)
return true;
old_off = old_arg->reg.var_off.value + old_arg->reg.off;
old_arg->var_off |= off != old_off;
return true;
}
static bool
nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
struct nfp_insn_meta *meta,
u32 helper_tgt, const struct bpf_reg_state *reg1)
{
if (!helper_tgt) {
pr_vlog(env, "%s: not supported by FW\n", fname);
return false;
}
/* Rest of the checks is only if we re-parse the same insn */
if (!meta->func_id)
return true;
if (meta->arg1.map_ptr != reg1->map_ptr) {
pr_vlog(env, "%s: called for different map\n", fname);
return false;
}
return true;
}
static int
nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
struct nfp_insn_meta *meta)
{
const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3;
struct nfp_app_bpf *bpf = nfp_prog->bpf;
u32 func_id = meta->insn.imm;
s64 off, old_off;
switch (func_id) {
case BPF_FUNC_xdp_adjust_head:
......@@ -131,41 +186,36 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
break;
case BPF_FUNC_map_lookup_elem:
if (!bpf->helpers.map_lookup) {
pr_vlog(env, "map_lookup: not supported by FW\n");
if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
bpf->helpers.map_lookup, reg1) ||
!nfp_bpf_stack_arg_ok("map_lookup", env, reg2,
meta->func_id ? &meta->arg2 : NULL))
return -EOPNOTSUPP;
}
if (reg2->type != PTR_TO_STACK) {
pr_vlog(env,
"map_lookup: unsupported key ptr type %d\n",
reg2->type);
return -EOPNOTSUPP;
}
if (!tnum_is_const(reg2->var_off)) {
pr_vlog(env, "map_lookup: variable key pointer\n");
break;
case BPF_FUNC_map_update_elem:
if (!nfp_bpf_map_call_ok("map_update", env, meta,
bpf->helpers.map_update, reg1) ||
!nfp_bpf_stack_arg_ok("map_update", env, reg2,
meta->func_id ? &meta->arg2 : NULL) ||
!nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
return -EOPNOTSUPP;
}
break;
off = reg2->var_off.value + reg2->off;
if (-off % 4) {
pr_vlog(env,
"map_lookup: unaligned stack pointer %lld\n",
-off);
case BPF_FUNC_map_delete_elem:
if (!nfp_bpf_map_call_ok("map_delete", env, meta,
bpf->helpers.map_delete, reg1) ||
!nfp_bpf_stack_arg_ok("map_delete", env, reg2,
meta->func_id ? &meta->arg2 : NULL))
return -EOPNOTSUPP;
}
break;
/* Rest of the checks is only if we re-parse the same insn */
if (!meta->func_id)
case BPF_FUNC_get_prandom_u32:
if (bpf->pseudo_random)
break;
pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
return -EOPNOTSUPP;
old_off = meta->arg2.var_off.value + meta->arg2.off;
meta->arg2_var_off |= off != old_off;
if (meta->arg1.map_ptr != reg1->map_ptr) {
pr_vlog(env, "map_lookup: called for different map\n");
return -EOPNOTSUPP;
}
break;
default:
pr_vlog(env, "unsupported function id: %d\n", func_id);
return -EOPNOTSUPP;
......@@ -173,7 +223,7 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
meta->func_id = func_id;
meta->arg1 = *reg1;
meta->arg2 = *reg2;
meta->arg2.reg = *reg2;
return 0;
}
......@@ -242,6 +292,72 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
return -EINVAL;
}
static const char *nfp_bpf_map_use_name(enum nfp_bpf_map_use use)
{
static const char * const names[] = {
[NFP_MAP_UNUSED] = "unused",
[NFP_MAP_USE_READ] = "read",
[NFP_MAP_USE_WRITE] = "write",
[NFP_MAP_USE_ATOMIC_CNT] = "atomic",
};
if (use >= ARRAY_SIZE(names) || !names[use])
return "unknown";
return names[use];
}
static int
nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env,
struct nfp_bpf_map *nfp_map,
unsigned int off, enum nfp_bpf_map_use use)
{
if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED &&
nfp_map->use_map[off / 4] != use) {
pr_vlog(env, "map value use type conflict %s vs %s off: %u\n",
nfp_bpf_map_use_name(nfp_map->use_map[off / 4]),
nfp_bpf_map_use_name(use), off);
return -EOPNOTSUPP;
}
nfp_map->use_map[off / 4] = use;
return 0;
}
static int
nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta,
const struct bpf_reg_state *reg,
enum nfp_bpf_map_use use)
{
struct bpf_offloaded_map *offmap;
struct nfp_bpf_map *nfp_map;
unsigned int size, off;
int i, err;
if (!tnum_is_const(reg->var_off)) {
pr_vlog(env, "map value offset is variable\n");
return -EOPNOTSUPP;
}
off = reg->var_off.value + meta->insn.off + reg->off;
size = BPF_LDST_BYTES(&meta->insn);
offmap = map_to_offmap(reg->map_ptr);
nfp_map = offmap->dev_priv;
if (off + size > offmap->map.value_size) {
pr_vlog(env, "map value access out-of-bounds\n");
return -EINVAL;
}
for (i = 0; i < size; i += 4 - (off + i) % 4) {
err = nfp_bpf_map_mark_used_one(env, nfp_map, off + i, use);
if (err)
return err;
}
return 0;
}
static int
nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
struct bpf_verifier_env *env, u8 reg_no)
......@@ -264,10 +380,22 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
}
if (reg->type == PTR_TO_MAP_VALUE) {
if (is_mbpf_load(meta)) {
err = nfp_bpf_map_mark_used(env, meta, reg,
NFP_MAP_USE_READ);
if (err)
return err;
}
if (is_mbpf_store(meta)) {
pr_vlog(env, "map writes not supported\n");
return -EOPNOTSUPP;
}
if (is_mbpf_xadd(meta)) {
err = nfp_bpf_map_mark_used(env, meta, reg,
NFP_MAP_USE_ATOMIC_CNT);
if (err)
return err;
}
}
if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
......@@ -281,6 +409,31 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
static int
nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
struct bpf_verifier_env *env)
{
const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg;
const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg;
if (dreg->type != PTR_TO_MAP_VALUE) {
pr_vlog(env, "atomic add not to a map value pointer: %d\n",
dreg->type);
return -EOPNOTSUPP;
}
if (sreg->type != SCALAR_VALUE) {
pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
return -EOPNOTSUPP;
}
meta->xadd_over_16bit |=
sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
meta->xadd_maybe_16bit |=
(sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
}
static int
nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
{
......@@ -313,6 +466,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (is_mbpf_store(meta))
return nfp_bpf_check_ptr(nfp_prog, meta, env,
meta->insn.dst_reg);
if (is_mbpf_xadd(meta))
return nfp_bpf_check_xadd(nfp_prog, meta, env);
return 0;
}
......
......@@ -48,6 +48,8 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_READ32_SWAP] = { 0x02, 0x5c },
[CMD_TGT_READ_LE] = { 0x01, 0x40 },
[CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
[CMD_TGT_ADD] = { 0x00, 0x47 },
[CMD_TGT_ADD_IMM] = { 0x02, 0x47 },
};
static bool unreg_is_imm(u16 reg)
......
......@@ -39,6 +39,7 @@
#include <linux/types.h>
#define REG_NONE 0
#define REG_WIDTH 4
#define RE_REG_NO_DST 0x020
#define RE_REG_IMM 0x020
......@@ -237,6 +238,8 @@ enum cmd_tgt_map {
CMD_TGT_READ32_SWAP,
CMD_TGT_READ_LE,
CMD_TGT_READ_SWAP_LE,
CMD_TGT_ADD,
CMD_TGT_ADD_IMM,
__CMD_TGT_MAP_SIZE,
};
......@@ -250,9 +253,12 @@ enum cmd_mode {
enum cmd_ctx_swap {
CMD_CTX_SWAP = 0,
CMD_CTX_SWAP_DEFER1 = 1,
CMD_CTX_SWAP_DEFER2 = 2,
CMD_CTX_NO_SWAP = 3,
};
#define CMD_OVE_DATA GENMASK(5, 3)
#define CMD_OVE_LEN BIT(7)
#define CMD_OV_LEN GENMASK(12, 8)
......@@ -278,6 +284,7 @@ enum lcsr_wr_src {
#define NFP_CSR_ACT_LM_ADDR1 0x6c
#define NFP_CSR_ACT_LM_ADDR2 0x94
#define NFP_CSR_ACT_LM_ADDR3 0x9c
#define NFP_CSR_PSEUDO_RND_NUM 0x148
/* Software register representation, independent of operand type */
#define NN_REG_TYPE GENMASK(31, 24)
......
......@@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv)
priv->status, table.valid);
}
trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
table.data1, table.data2, table.line,
table.blink2, table.ilink1, table.ilink2,
table.bcon_time, table.gp1, table.gp2,
table.gp3, table.ucode_ver, table.hw_ver,
0, table.brd_ver);
trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver);
IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id,
desc_lookup(table.error_id));
IWL_ERR(priv, "0x%08X | uPc\n", table.pc);
......
......@@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx,
__entry->framelen, __entry->skbaddr)
);
struct iwl_error_event_table;
TRACE_EVENT(iwlwifi_dev_ucode_error,
TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1,
u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type,
u32 major, u32 minor, u32 hw_ver, u32 brd_ver),
TP_ARGS(dev, desc, tsf_low, data1, data2, line,
blink2, ilink1, ilink2, bcon_time, gp1, gp2,
rev_type, major, minor, hw_ver, brd_ver),
TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table,
u32 hw_ver, u32 brd_ver),
TP_ARGS(dev, table, hw_ver, brd_ver),
TP_STRUCT__entry(
DEV_ENTRY
__field(u32, desc)
......@@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error,
),
TP_fast_assign(
DEV_ASSIGN;
__entry->desc = desc;
__entry->tsf_low = tsf_low;
__entry->data1 = data1;
__entry->data2 = data2;
__entry->line = line;
__entry->blink2 = blink2;
__entry->ilink1 = ilink1;
__entry->ilink2 = ilink2;
__entry->bcon_time = bcon_time;
__entry->gp1 = gp1;
__entry->gp2 = gp2;
__entry->rev_type = rev_type;
__entry->major = major;
__entry->minor = minor;
__entry->desc = table->error_id;
__entry->tsf_low = table->tsf_low;
__entry->data1 = table->data1;
__entry->data2 = table->data2;
__entry->line = table->line;
__entry->blink2 = table->blink2;
__entry->ilink1 = table->ilink1;
__entry->ilink2 = table->ilink2;
__entry->bcon_time = table->bcon_time;
__entry->gp1 = table->gp1;
__entry->gp2 = table->gp2;
__entry->rev_type = table->gp3;
__entry->major = table->ucode_ver;
__entry->minor = table->hw_ver;
__entry->hw_ver = hw_ver;
__entry->brd_ver = brd_ver;
),
......
......@@ -30,6 +30,7 @@
#ifndef __CHECKER__
#include "iwl-trans.h"
#include "dvm/commands.h"
#define CREATE_TRACE_POINTS
#include "iwl-devtrace.h"
......
......@@ -549,12 +549,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
table.data1, table.data2, table.data3,
table.blink2, table.ilink1,
table.ilink2, table.bcon_time, table.gp1,
table.gp2, table.fw_rev_type, table.major,
table.minor, table.hw_ver, table.brd_ver);
trace_iwlwifi_dev_ucode_error(trans->dev, &table, table.hw_ver, table.brd_ver);
IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id,
desc_lookup(table.error_id));
IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);
......
......@@ -34,7 +34,7 @@
#define REG_PR_FMT "%04x=%08x"
#define REG_PR_ARG __entry->reg, __entry->val
DECLARE_EVENT_CLASS(dev_reg_evt,
DECLARE_EVENT_CLASS(dev_reg_evtu,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val),
TP_STRUCT__entry(
......@@ -51,12 +51,12 @@ DECLARE_EVENT_CLASS(dev_reg_evt,
)
);
DEFINE_EVENT(dev_reg_evt, reg_read,
DEFINE_EVENT(dev_reg_evtu, reg_read,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val)
);
DEFINE_EVENT(dev_reg_evt, reg_write,
DEFINE_EVENT(dev_reg_evtu, reg_write,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val)
);
......
......@@ -178,6 +178,15 @@
#define TRACE_SYSCALLS()
#endif
#ifdef CONFIG_BPF_EVENTS
#define BPF_RAW_TP() STRUCT_ALIGN(); \
VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \
KEEP(*(__bpf_raw_tp_map)) \
VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
#else
#define BPF_RAW_TP()
#endif
#ifdef CONFIG_SERIAL_EARLYCON
#define EARLYCON_TABLE() STRUCT_ALIGN(); \
VMLINUX_SYMBOL(__earlycon_table) = .; \
......@@ -249,6 +258,7 @@
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
BPF_RAW_TP() \
TRACEPOINT_STR()
/*
......
......@@ -6,6 +6,7 @@
#include <uapi/linux/bpf.h>
struct sock;
struct sockaddr;
struct cgroup;
struct sk_buff;
struct bpf_sock_ops_kern;
......@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
enum bpf_attach_type type);
......@@ -93,16 +98,64 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
__ret = __cgroup_bpf_run_filter_sk(sk, \
BPF_CGROUP_INET_SOCK_CREATE); \
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
release_sock(sk); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
sk->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
......@@ -132,9 +185,18 @@ struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
......
......@@ -208,12 +208,15 @@ struct bpf_prog_ops {
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
const struct bpf_func_proto *
(*get_func_proto)(enum bpf_func_id func_id,
const struct bpf_prog *prog);
/* return true if 'size' wide access at offset 'off' within bpf_context
* with 'type' (read or write) is allowed
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
......
......@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
......@@ -19,6 +20,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
......
......@@ -153,7 +153,7 @@ struct bpf_insn_aux_data {
#define BPF_VERIFIER_TMP_LOG_SIZE 1024
struct bpf_verifer_log {
struct bpf_verifier_log {
u32 level;
char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
char __user *ubuf;
......@@ -161,11 +161,16 @@ struct bpf_verifer_log {
u32 len_total;
};
static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
{
return log->len_used >= log->len_total - 1;
}
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return log->level && log->ubuf && !bpf_verifier_log_full(log);
}
#define BPF_MAX_SUBPROGS 256
/* single container for all structs
......@@ -185,13 +190,15 @@ struct bpf_verifier_env {
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
struct bpf_verifer_log log;
struct bpf_verifier_log log;
u32 subprog_starts[BPF_MAX_SUBPROGS];
/* computes the stack depth of each bpf function */
u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
u32 subprog_cnt;
};
void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
va_list args);
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
......
......@@ -372,7 +372,7 @@ struct xdp_rxq_info;
#define BPF_LDST_BYTES(insn) \
({ \
const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
WARN_ON(__size < 0); \
__size; \
})
......@@ -469,6 +469,7 @@ struct bpf_prog {
is_func:1, /* program is a bpf function */
kprobe_override:1; /* Do we override a kprobe? */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
......@@ -521,6 +522,8 @@ struct sk_msg_buff {
__u32 key;
__u32 flags;
struct bpf_map *map;
struct sk_buff *skb;
struct list_head list;
};
/* Compute the linear packet data range [data, data_end) which
......@@ -1018,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
return SKF_AD_MAX;
}
struct bpf_sock_addr_kern {
struct sock *sk;
struct sockaddr *uaddr;
/* Temporary "register" to make indirect stores to nested structures
* defined above. We need three registers to make such a store, but
* only two (src and dst) are available at convert_ctx_access time
*/
u64 tmp_reg;
};
struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
......
......@@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
/* This counts to 12. Any more, it will return 13th argument. */
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define __CONCAT(a, b) a ## b
#define CONCATENATE(a, b) __CONCAT(a, b)
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
......
......@@ -248,6 +248,24 @@ static inline void *sg_virt(struct scatterlist *sg)
return page_address(sg_page(sg)) + sg->offset;
}
/**
* sg_init_marker - Initialize markers in sg table
* @sgl: The SG table
* @nents: Number of entries in table
*
**/
static inline void sg_init_marker(struct scatterlist *sgl,
unsigned int nents)
{
#ifdef CONFIG_DEBUG_SG
unsigned int i;
for (i = 0; i < nents; i++)
sgl[i].sg_magic = SG_MAGIC;
#endif
sg_mark_end(&sgl[nents - 1]);
}
int sg_nents(struct scatterlist *sg);
int sg_nents_for_len(struct scatterlist *sg, u64 len);
struct scatterlist *sg_next(struct scatterlist *);
......
......@@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
......@@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
return -EOPNOTSUPP;
}
static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
{
return -EOPNOTSUPP;
}
static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
{
return -EOPNOTSUPP;
}
static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
{
return NULL;
}
#endif
enum {
......@@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
void perf_trace_buf_update(void *record, u16 type);
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3);
void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4);
void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5);
void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6);
void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8);
void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9);
void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10);
void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10, u64 arg11);
void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
struct trace_event_call *call, u64 count,
struct pt_regs *regs, struct hlist_head *head,
......
......@@ -35,4 +35,10 @@ struct tracepoint {
struct tracepoint_func __rcu *funcs;
};
struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
u32 num_args;
} __aligned(32);
#endif
......@@ -231,6 +231,13 @@ struct ipv6_stub {
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
struct ipv6_bpf_stub {
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
/*
* identify MLD packets for MLD filter exceptions
*/
......
......@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
......
......@@ -1043,6 +1043,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
......
......@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
struct proto {
void (*close)(struct sock *sk,
long timeout);
int (*pre_connect)(struct sock *sk,
struct sockaddr *uaddr,
int addr_len);
int (*connect)(struct sock *sk,
struct sockaddr *uaddr,
int addr_len);
......@@ -1085,6 +1088,7 @@ struct proto {
#endif
bool (*stream_memory_free)(const struct sock *sk);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
......
......@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
......
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM_VAR
#ifdef CONFIG_BPF_EVENTS
#undef __entry
#define __entry entry
#undef __get_dynamic_array
#define __get_dynamic_array(field) \
((void *)__entry + (__entry->__data_loc_##field & 0xffff))
#undef __get_dynamic_array_len
#define __get_dynamic_array_len(field) \
((__entry->__data_loc_##field >> 16) & 0xffff)
#undef __get_str
#define __get_str(field) ((char *)__get_dynamic_array(field))
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
#undef __perf_count
#define __perf_count(c) (c)
#undef __perf_task
#define __perf_task(t) (t)
/* cast any integer, pointer, or small struct to u64 */
#define UINTTYPE(size) \
__typeof__(__builtin_choose_expr(size == 1, (u8)1, \
__builtin_choose_expr(size == 2, (u16)2, \
__builtin_choose_expr(size == 4, (u32)3, \
__builtin_choose_expr(size == 8, (u64)4, \
(void)5)))))
#define __CAST_TO_U64(x) ({ \
typeof(x) __src = (x); \
UINTTYPE(sizeof(x)) __dst; \
memcpy(&__dst, &__src, sizeof(__dst)); \
(u64)__dst; })
#define __CAST1(a,...) __CAST_TO_U64(a)
#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
/* tracepoints with more than 12 arguments will hit build error */
#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace void \
__bpf_trace_##call(void *__data, proto) \
{ \
struct bpf_prog *prog = __data; \
CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \
}
/*
* This part is compiled out, it is only here as a build time check
* to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated.
*/
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
} \
static struct bpf_raw_event_map __used \
__attribute__((section("__bpf_raw_tp_map"))) \
__bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
};
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#endif /* CONFIG_BPF_EVENTS */
......@@ -95,6 +95,7 @@
#ifdef TRACEPOINTS_ENABLED
#include <trace/trace_events.h>
#include <trace/perf.h>
#include <trace/bpf_probe.h>
#endif
#undef TRACE_EVENT
......
......@@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
TRACE_EVENT(f2fs_truncate_partial_nodes,
TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err),
TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err),
TP_ARGS(inode, nid, depth, err),
......
......@@ -94,6 +94,7 @@ enum bpf_cmd {
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
};
enum bpf_map_type {
......@@ -134,6 +135,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
};
enum bpf_attach_type {
......@@ -145,6 +148,12 @@ enum bpf_attach_type {
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
BPF_CGROUP_INET4_BIND,
BPF_CGROUP_INET6_BIND,
BPF_CGROUP_INET4_CONNECT,
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE
};
......@@ -294,6 +303,11 @@ union bpf_attr {
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
/* For some prog types expected attach type must be known at
* load time to verify attach type specific parts of prog
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
......@@ -344,6 +358,11 @@ union bpf_attr {
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
struct {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -729,6 +748,13 @@ union bpf_attr {
* @flags: reserved for future use
* Return: SK_PASS
*
* int bpf_bind(ctx, addr, addr_len)
* Bind socket to address. Only binding to IP is supported, no port can be
* set in addr.
* @ctx: pointer to context of type bpf_sock_addr
* @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -794,7 +820,8 @@ union bpf_attr {
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data),
FN(msg_pull_data), \
FN(bind),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -923,6 +950,15 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
*/
};
#define XDP_PACKET_HEADROOM 256
......@@ -998,6 +1034,26 @@ struct bpf_map_info {
__u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
__u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__u32 user_port; /* Allows 4-byte read and write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
* and their replies.
* Some of this fields are in network (bigendian) byte order and may need
......@@ -1152,4 +1208,8 @@ struct bpf_cgroup_dev_ctx {
__u32 minor;
};
struct bpf_raw_tracepoint_args {
__u64 args[0];
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -494,6 +494,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
/**
* __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted
*
* socket is expected to be of type INET or INET6.
*
* This function will return %-EPERM if an attached program is found and
* returned value != 1 during execution. In all other cases, 0 is returned.
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type)
{
struct bpf_sock_addr_kern ctx = {
.sk = sk,
.uaddr = uaddr,
};
struct cgroup *cgrp;
int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
*/
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
/**
* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
* @sk: socket to get cgroup from
......@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id)
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
......@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
......
......@@ -113,16 +113,16 @@ static const char *const bpf_jmp_string[16] = {
};
static void print_bpf_end_insn(bpf_insn_print_t verbose,
struct bpf_verifier_env *env,
void *private_data,
const struct bpf_insn *insn)
{
verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg,
verbose(private_data, "(%02x) r%d = %s%d r%d\n",
insn->code, insn->dst_reg,
BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
insn->imm, insn->dst_reg);
}
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
struct bpf_verifier_env *env,
const struct bpf_insn *insn,
bool allow_ptr_leaks)
{
......@@ -132,23 +132,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
if (class == BPF_ALU || class == BPF_ALU64) {
if (BPF_OP(insn->code) == BPF_END) {
if (class == BPF_ALU64)
verbose(env, "BUG_alu64_%02x\n", insn->code);
verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
else
print_bpf_end_insn(verbose, env, insn);
print_bpf_end_insn(verbose, cbs->private_data, insn);
} else if (BPF_OP(insn->code) == BPF_NEG) {
verbose(env, "(%02x) r%d = %s-r%d\n",
verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
insn->code, insn->dst_reg,
class == BPF_ALU ? "(u32) " : "",
insn->dst_reg);
} else if (BPF_SRC(insn->code) == BPF_X) {
verbose(env, "(%02x) %sr%d %s %sr%d\n",
verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
class == BPF_ALU ? "(u32) " : "",
insn->src_reg);
} else {
verbose(env, "(%02x) %sr%d %s %s%d\n",
verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
......@@ -157,46 +157,46 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
}
} else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_MEM)
verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n",
verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->src_reg);
else if (BPF_MODE(insn->code) == BPF_XADD)
verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
else
verbose(env, "BUG_%02x\n", insn->code);
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
} else if (class == BPF_ST) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose(env, "BUG_st_%02x\n", insn->code);
verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
return;
}
verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n",
verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose(env, "BUG_ldx_%02x\n", insn->code);
verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
return;
}
verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n",
verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
insn->code, insn->dst_reg,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->off);
} else if (class == BPF_LD) {
if (BPF_MODE(insn->code) == BPF_ABS) {
verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n",
verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->imm);
} else if (BPF_MODE(insn->code) == BPF_IND) {
verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->imm);
......@@ -212,12 +212,12 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
if (map_ptr && !allow_ptr_leaks)
imm = 0;
verbose(env, "(%02x) r%d = %s\n",
verbose(cbs->private_data, "(%02x) r%d = %s\n",
insn->code, insn->dst_reg,
__func_imm_name(cbs, insn, imm,
tmp, sizeof(tmp)));
} else {
verbose(env, "BUG_ld_%02x\n", insn->code);
verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
return;
}
} else if (class == BPF_JMP) {
......@@ -227,35 +227,35 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
char tmp[64];
if (insn->src_reg == BPF_PSEUDO_CALL) {
verbose(env, "(%02x) call pc%s\n",
verbose(cbs->private_data, "(%02x) call pc%s\n",
insn->code,
__func_get_name(cbs, insn,
tmp, sizeof(tmp)));
} else {
strcpy(tmp, "unknown");
verbose(env, "(%02x) call %s#%d\n", insn->code,
verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
__func_get_name(cbs, insn,
tmp, sizeof(tmp)),
insn->imm);
}
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose(env, "(%02x) goto pc%+d\n",
verbose(cbs->private_data, "(%02x) goto pc%+d\n",
insn->code, insn->off);
} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
verbose(env, "(%02x) exit\n", insn->code);
verbose(cbs->private_data, "(%02x) exit\n", insn->code);
} else if (BPF_SRC(insn->code) == BPF_X) {
verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n",
verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->src_reg, insn->off);
} else {
verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n",
verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->imm, insn->off);
}
} else {
verbose(env, "(%02x) %s\n",
verbose(cbs->private_data, "(%02x) %s\n",
insn->code, bpf_class_string[class]);
}
}
......@@ -22,14 +22,12 @@
#include <string.h>
#endif
struct bpf_verifier_env;
extern const char *const bpf_alu_string[16];
extern const char *const bpf_class_string[8];
const char *func_id_name(int id);
typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
const char *, ...);
typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
const struct bpf_insn *insn);
......@@ -45,7 +43,6 @@ struct bpf_insn_cbs {
};
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
struct bpf_verifier_env *env,
const struct bpf_insn *insn,
bool allow_ptr_leaks);
#endif
......@@ -41,6 +41,8 @@
#include <linux/mm.h>
#include <net/strparser.h>
#include <net/tcp.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
......@@ -82,6 +84,7 @@ struct smap_psock {
int sg_size;
int eval;
struct sk_msg_buff *cork;
struct list_head ingress;
struct strparser strp;
struct bpf_prog *bpf_tx_msg;
......@@ -103,6 +106,8 @@ struct smap_psock {
};
static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
......@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
return rcu_dereference_sk_user_data(sk);
}
static bool bpf_tcp_stream_read(const struct sock *sk)
{
struct smap_psock *psock;
bool empty = true;
rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock))
goto out;
empty = list_empty(&psock->ingress);
out:
rcu_read_unlock();
return !empty;
}
static struct proto tcp_bpf_proto;
static int bpf_tcp_init(struct sock *sk)
{
......@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk)
if (psock->bpf_tx_msg) {
tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
}
sk->sk_prot = &tcp_bpf_proto;
......@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
{
void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e, *tmp;
struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock;
struct sock *osk;
......@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
close_fun = psock->save_close;
write_lock_bh(&sk->sk_callback_lock);
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
kfree(md);
}
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) {
......@@ -312,7 +341,7 @@ static int bpf_tcp_push(struct sock *sk, int apply_bytes,
md->sg_start++;
if (md->sg_start == MAX_SKB_FRAGS)
md->sg_start = 0;
memset(sg, 0, sizeof(*sg));
sg_init_table(sg, 1);
if (md->sg_start == md->sg_end)
break;
......@@ -468,6 +497,72 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
return _rc;
}
static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
struct smap_psock *psock,
struct sk_msg_buff *md, int flags)
{
bool apply = apply_bytes;
size_t size, copied = 0;
struct sk_msg_buff *r;
int err = 0, i;
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
if (unlikely(!r))
return -ENOMEM;
lock_sock(sk);
r->sg_start = md->sg_start;
i = md->sg_start;
do {
r->sg_data[i] = md->sg_data[i];
size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length;
if (!sk_wmem_schedule(sk, size)) {
if (!copied)
err = -ENOMEM;
break;
}
sk_mem_charge(sk, size);
r->sg_data[i].length = size;
md->sg_data[i].length -= size;
md->sg_data[i].offset += size;
copied += size;
if (md->sg_data[i].length) {
get_page(sg_page(&r->sg_data[i]));
r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
} else {
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
r->sg_end = i;
}
if (apply) {
apply_bytes -= size;
if (!apply_bytes)
break;
}
} while (i != md->sg_end);
md->sg_start = i;
if (!err) {
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
} else {
free_start_sg(sk, r);
kfree(r);
}
release_sock(sk);
return err;
}
static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct sk_msg_buff *md,
int flags)
......@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct smap_psock *psock;
struct scatterlist *sg;
int i, err, free = 0;
bool ingress = !!(md->flags & BPF_F_INGRESS);
sg = md->sg_data;
......@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
goto out_rcu;
rcu_read_unlock();
lock_sock(sk);
err = bpf_tcp_push(sk, send, md, flags, false);
release_sock(sk);
if (ingress) {
err = bpf_tcp_ingress(sk, send, psock, md, flags);
} else {
lock_sock(sk);
err = bpf_tcp_push(sk, send, md, flags, false);
release_sock(sk);
}
smap_release_sock(psock, sk);
if (unlikely(err))
goto out;
......@@ -623,6 +724,92 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
return err;
}
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct iov_iter *iter = &msg->msg_iter;
struct smap_psock *psock;
int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock))
goto out;
if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
goto out;
rcu_read_unlock();
if (!skb_queue_empty(&sk->sk_receive_queue))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
while (copied != len) {
struct scatterlist *sg;
struct sk_msg_buff *md;
int i;
md = list_first_entry_or_null(&psock->ingress,
struct sk_msg_buff, list);
if (unlikely(!md))
break;
i = md->sg_start;
do {
struct page *page;
int n, copy;
sg = &md->sg_data[i];
copy = sg->length;
page = sg_page(sg);
if (copied + copy > len)
copy = len - copied;
n = copy_page_to_iter(page, sg->offset, copy, iter);
if (n != copy) {
md->sg_start = i;
release_sock(sk);
smap_release_sock(psock, sk);
return -EFAULT;
}
copied += copy;
sg->offset += copy;
sg->length -= copy;
sk_mem_uncharge(sk, copy);
if (!sg->length) {
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
if (!md->skb)
put_page(page);
}
if (copied == len)
break;
} while (i != md->sg_end);
md->sg_start = i;
if (!sg->length && md->sg_start == md->sg_end) {
list_del(&md->list);
if (md->skb)
consume_skb(md->skb);
kfree(md);
}
}
release_sock(sk);
smap_release_sock(psock, sk);
return copied;
out:
rcu_read_unlock();
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
}
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
......@@ -656,7 +843,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
sg = md.sg_data;
sg_init_table(sg, MAX_SKB_FRAGS);
sg_init_marker(sg, MAX_SKB_FRAGS);
rcu_read_unlock();
lock_sock(sk);
......@@ -763,10 +950,14 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
lock_sock(sk);
if (psock->cork_bytes)
if (psock->cork_bytes) {
m = psock->cork;
else
sg = &m->sg_data[m->sg_end];
} else {
m = &md;
sg = m->sg_data;
sg_init_marker(sg, MAX_SKB_FRAGS);
}
/* Catch case where ring is full and sendpage is stalled. */
if (unlikely(m->sg_end == m->sg_start &&
......@@ -774,7 +965,6 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
goto out_err;
psock->sg_size += size;
sg = &m->sg_data[m->sg_end];
sg_set_page(sg, page, size, offset);
get_page(page);
m->sg_copy[m->sg_end] = true;
......@@ -861,27 +1051,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
__SK_DROP;
}
static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sock;
int copied = 0, num_sg;
struct sk_msg_buff *r;
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!r))
return -EAGAIN;
if (!sk_rmem_schedule(sk, skb, skb->len)) {
kfree(r);
return -EAGAIN;
}
sg_init_table(r->sg_data, MAX_SKB_FRAGS);
num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
if (unlikely(num_sg < 0)) {
kfree(r);
return num_sg;
}
sk_mem_charge(sk, skb->len);
copied = skb->len;
r->sg_start = 0;
r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
r->skb = skb;
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
return copied;
}
static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
{
struct smap_psock *peer;
struct sock *sk;
__u32 in;
int rc;
rc = smap_verdict_func(psock, skb);
switch (rc) {
case __SK_REDIRECT:
sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);
if (likely(peer &&
test_bit(SMAP_TX_RUNNING, &peer->state) &&
!sock_flag(sk, SOCK_DEAD) &&
sock_writeable(sk))) {
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
}
if (!sk) {
kfree_skb(skb);
break;
}
peer = smap_psock_sk(sk);
in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
!test_bit(SMAP_TX_RUNNING, &peer->state))) {
kfree_skb(skb);
break;
}
if (!in && sock_writeable(sk)) {
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
} else if (in &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
}
/* Fall through and free skb otherwise */
case __SK_DROP:
......@@ -943,15 +1178,23 @@ static void smap_tx_work(struct work_struct *w)
}
while ((skb = skb_dequeue(&psock->rxqueue))) {
__u32 flags;
rem = skb->len;
off = 0;
start:
flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
do {
if (likely(psock->sock->sk_socket))
n = skb_send_sock_locked(psock->sock,
skb, off, rem);
else
if (likely(psock->sock->sk_socket)) {
if (flags)
n = smap_do_ingress(psock, skb);
else
n = skb_send_sock_locked(psock->sock,
skb, off, rem);
} else {
n = -EINVAL;
}
if (n <= 0) {
if (n == -EAGAIN) {
/* Retry when space is available */
......@@ -969,7 +1212,9 @@ static void smap_tx_work(struct work_struct *w)
rem -= n;
off += n;
} while (rem);
kfree_skb(skb);
if (!flags)
kfree_skb(skb);
}
out:
release_sock(psock->sock);
......@@ -1107,6 +1352,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
static void smap_gc_work(struct work_struct *w)
{
struct smap_psock_map_entry *e, *tmp;
struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock;
psock = container_of(w, struct smap_psock, gc_work);
......@@ -1131,6 +1377,12 @@ static void smap_gc_work(struct work_struct *w)
kfree(psock->cork);
}
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
kfree(md);
}
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
list_del(&e->list);
kfree(e);
......@@ -1160,6 +1412,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
INIT_WORK(&psock->tx_work, smap_tx_work);
INIT_WORK(&psock->gc_work, smap_gc_work);
INIT_LIST_HEAD(&psock->maps);
INIT_LIST_HEAD(&psock->ingress);
refcount_set(&psock->refcnt, 1);
rcu_assign_sk_user_data(sock, psock);
......
......@@ -203,11 +203,13 @@ static int bpf_map_alloc_id(struct bpf_map *map)
{
int id;
idr_preload(GFP_KERNEL);
spin_lock_bh(&map_idr_lock);
id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
if (id > 0)
map->id = id;
spin_unlock_bh(&map_idr_lock);
idr_preload_end();
if (WARN_ON_ONCE(!id))
return -ENOSPC;
......@@ -940,11 +942,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
{
int id;
idr_preload(GFP_KERNEL);
spin_lock_bh(&prog_idr_lock);
id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
if (id > 0)
prog->aux->id = id;
spin_unlock_bh(&prog_idr_lock);
idr_preload_end();
/* id is in [1, INT_MAX) */
if (WARN_ON_ONCE(!id))
......@@ -1167,8 +1171,75 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
/* Initially all BPF programs could be loaded w/o specifying
* expected_attach_type. Later for some of them specifying expected_attach_type
* at load time became required so that program could be validated properly.
* Programs of types that are allowed to be loaded both w/ and w/o (for
* backward compatibility) expected_attach_type, should have the default attach
* type assigned to expected_attach_type for the latter case, so that it can be
* validated later at attach time.
*
* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
* prog type requires it but has some attach types that have to be backward
* compatible.
*/
static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
{
switch (attr->prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
* exist so checking for non-zero is the way to go here.
*/
if (!attr->expected_attach_type)
attr->expected_attach_type =
BPF_CGROUP_INET_SOCK_CREATE;
break;
}
}
static int
bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type)
{
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
switch (expected_attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
return 0;
default:
return -EINVAL;
}
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
switch (expected_attach_type) {
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
return 0;
default:
return -EINVAL;
}
default:
return 0;
}
}
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type)
{
switch (prog->type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
default:
return 0;
}
}
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex
#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
static int bpf_prog_load(union bpf_attr *attr)
{
......@@ -1205,11 +1276,17 @@ static int bpf_prog_load(union bpf_attr *attr)
!capable(CAP_SYS_ADMIN))
return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
return -EINVAL;
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
return -ENOMEM;
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->offload_requested = !!attr->prog_ifindex;
err = security_bpf_prog_alloc(prog->aux);
......@@ -1311,6 +1388,81 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
};
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
{
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
if (raw_tp->prog) {
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
kfree(raw_tp);
return 0;
}
static const struct file_operations bpf_raw_tp_fops = {
.release = bpf_raw_tracepoint_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
};
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
char tp_name[128];
int tp_fd, err;
if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
sizeof(tp_name) - 1) < 0)
return -EFAULT;
tp_name[sizeof(tp_name) - 1] = 0;
btp = bpf_find_raw_tracepoint(tp_name);
if (!btp)
return -ENOENT;
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
if (!raw_tp)
return -ENOMEM;
raw_tp->btp = btp;
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
BPF_PROG_TYPE_RAW_TRACEPOINT);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out_free_tp;
}
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
goto out_put_prog;
raw_tp->prog = prog;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
goto out_put_prog;
}
return tp_fd;
out_put_prog:
bpf_prog_put(prog);
out_free_tp:
kfree(raw_tp);
return err;
}
#ifdef CONFIG_CGROUP_BPF
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
......@@ -1374,8 +1526,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
......@@ -1395,6 +1555,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
bpf_prog_put(prog);
return -EINVAL;
}
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp)) {
bpf_prog_put(prog);
......@@ -1431,8 +1596,16 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
......@@ -1482,6 +1655,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
......@@ -1921,6 +2100,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET_INFO_BY_FD:
err = bpf_obj_get_info_by_fd(&attr, uattr);
break;
case BPF_RAW_TRACEPOINT_OPEN:
err = bpf_raw_tracepoint_open(&attr);
break;
default:
err = -EINVAL;
break;
......
......@@ -168,23 +168,12 @@ struct bpf_call_arg_meta {
static DEFINE_MUTEX(bpf_verifier_lock);
/* log_level controls verbosity level of eBPF verifier.
* bpf_verifier_log_write() is used to dump the verification trace to the log,
* so the user can figure out what's wrong with the program
*/
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...)
void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
va_list args)
{
struct bpf_verifer_log *log = &env->log;
unsigned int n;
va_list args;
if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
return;
va_start(args, fmt);
n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
va_end(args);
WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
"verifier log line truncated - local buffer too short\n");
......@@ -197,14 +186,37 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
else
log->ubuf = NULL;
}
EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
/* Historically bpf_verifier_log_write was called verbose, but the name was too
* generic for symbol export. The function was renamed, but not the calls in
* the verifier to avoid complicating backports. Hence the alias below.
/* log_level controls verbosity level of eBPF verifier.
* bpf_verifier_log_write() is used to dump the verification trace to the log,
* so the user can figure out what's wrong with the program
*/
static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
const char *fmt, ...)
__attribute__((alias("bpf_verifier_log_write")));
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...)
{
va_list args;
if (!bpf_verifier_log_needed(&env->log))
return;
va_start(args, fmt);
bpf_verifier_vlog(&env->log, fmt, args);
va_end(args);
}
EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
struct bpf_verifier_env *env = private_data;
va_list args;
if (!bpf_verifier_log_needed(&env->log))
return;
va_start(args, fmt);
bpf_verifier_vlog(&env->log, fmt, args);
va_end(args);
}
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
......@@ -1311,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
};
if (env->ops->is_valid_access &&
env->ops->is_valid_access(off, size, t, &info)) {
env->ops->is_valid_access(off, size, t, env->prog, &info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
......@@ -2337,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
if (env->ops->get_func_proto)
fn = env->ops->get_func_proto(func_id);
fn = env->ops->get_func_proto(func_id, env->prog);
if (!fn) {
verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
func_id);
......@@ -3875,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
break;
......@@ -4600,10 +4613,11 @@ static int do_check(struct bpf_verifier_env *env)
if (env->log.level) {
const struct bpf_insn_cbs cbs = {
.cb_print = verbose,
.private_data = env,
};
verbose(env, "%d: ", insn_idx);
print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
}
if (bpf_prog_is_dev_bound(env->prog->aux)) {
......@@ -5559,7 +5573,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm);
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
......@@ -5601,7 +5615,7 @@ static void free_states(struct bpf_verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
struct bpf_verifier_env *env;
struct bpf_verifer_log *log;
struct bpf_verifier_log *log;
int ret = -EINVAL;
/* no program is valid */
......
......@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
......@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
}
}
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_override_return_proto;
#endif
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct pt_regs))
......@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
......@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
.arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -731,11 +737,94 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
* inside bpf_*_event_output and/or bpf_get_stack_id
*/
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
perf_fetch_caller_regs(regs);
return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
.func = bpf_perf_event_output_raw_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
.func = bpf_get_stackid_raw_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_raw_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
default:
return tracing_func_proto(func_id, prog);
}
}
static bool raw_tp_prog_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
/* largest tracepoint in the kernel has 12 args */
if (off < 0 || off >= sizeof(__u64) * 12)
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
return true;
}
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
.get_func_proto = raw_tp_prog_func_proto,
.is_valid_access = raw_tp_prog_is_valid_access,
};
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_u64 = sizeof(u64);
......@@ -908,3 +997,106 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
return ret;
}
extern struct bpf_raw_event_map __start__bpf_raw_tp[];
extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
{
struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
for (; btp < __stop__bpf_raw_tp; btp++) {
if (!strcmp(btp->tp->name, name))
return btp;
}
return NULL;
}
static __always_inline
void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
{
rcu_read_lock();
preempt_disable();
(void) BPF_PROG_RUN(prog, args);
preempt_enable();
rcu_read_unlock();
}
#define UNPACK(...) __VA_ARGS__
#define REPEAT_1(FN, DL, X, ...) FN(X)
#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
#define SARG(X) u64 arg##X
#define COPY(X) args[X] = arg##X
#define __DL_COM (,)
#define __DL_SEM (;)
#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
#define BPF_TRACE_DEFN_x(x) \
void bpf_trace_run##x(struct bpf_prog *prog, \
REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
{ \
u64 args[x]; \
REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
__bpf_trace_run(prog, args); \
} \
EXPORT_SYMBOL_GPL(bpf_trace_run##x)
BPF_TRACE_DEFN_x(1);
BPF_TRACE_DEFN_x(2);
BPF_TRACE_DEFN_x(3);
BPF_TRACE_DEFN_x(4);
BPF_TRACE_DEFN_x(5);
BPF_TRACE_DEFN_x(6);
BPF_TRACE_DEFN_x(7);
BPF_TRACE_DEFN_x(8);
BPF_TRACE_DEFN_x(9);
BPF_TRACE_DEFN_x(10);
BPF_TRACE_DEFN_x(11);
BPF_TRACE_DEFN_x(12);
static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
struct tracepoint *tp = btp->tp;
/*
* check that program doesn't access arguments beyond what's
* available in this tracepoint
*/
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
return -EINVAL;
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
}
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
int err;
mutex_lock(&bpf_event_mutex);
err = __bpf_probe_register(btp, prog);
mutex_unlock(&bpf_event_mutex);
return err;
}
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
int err;
mutex_lock(&bpf_event_mutex);
err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
mutex_unlock(&bpf_event_mutex);
return err;
}
......@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
void sg_init_table(struct scatterlist *sgl, unsigned int nents)
{
memset(sgl, 0, sizeof(*sgl) * nents);
#ifdef CONFIG_DEBUG_SG
{
unsigned int i;
for (i = 0; i < nents; i++)
sgl[i].sg_magic = SG_MAGIC;
}
#endif
sg_mark_end(&sgl[nents - 1]);
sg_init_marker(sgl, nents);
}
EXPORT_SYMBOL(sg_init_table);
......
此差异已折叠。
......@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
return sk->sk_prot->bind(sk, uaddr, addr_len);
}
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
return -EINVAL;
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
if (err)
return err;
return __inet_bind(sk, uaddr, addr_len, false, true);
}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
u32 tb_id = RT_TABLE_LOCAL;
int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
......@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
lock_sock(sk);
if (with_lock)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
......@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) {
if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
goto out_release_sock;
}
}
if (inet->inet_rcv_saddr)
......@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
if (with_lock)
release_sock(sk);
out:
return err;
}
EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
if (err)
return err;
}
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
......@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
if (err)
goto out;
}
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
......
......@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
return (tp->rcv_nxt - tp->copied_seq >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
/*
* Wait for a TCP event.
*
......@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
if (tp->rcv_nxt - tp->copied_seq >= target)
if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
......
......@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v4_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
}
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
......@@ -2408,6 +2423,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
......
......@@ -1658,6 +1658,19 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
goto try_again;
}
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
/* This check is replicated from __ip4_datagram_connect() and
* intended to prevent BPF program called below from accessing bytes
* that are out of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
}
EXPORT_SYMBOL(udp_pre_connect);
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
......@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udp_pre_connect,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
......
......@@ -277,15 +277,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
bool saved_ipv6only;
int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
......@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
if (err)
return err;
return __inet6_bind(sk, uaddr, addr_len, false, true);
}
EXPORT_SYMBOL(inet6_bind);
int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
bool saved_ipv6only;
int addr_type = 0;
int err = 0;
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
......@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
if (with_lock)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
......@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) {
if (sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
goto out;
}
}
if (addr_type != IPV6_ADDR_ANY)
......@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
release_sock(sk);
if (with_lock)
release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
......@@ -868,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.inet6_bind = __inet6_bind,
};
static int __init inet6_init(void)
{
struct list_head *r;
......@@ -1024,6 +1053,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
......
......@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
ipv6_hdr(skb)->saddr.s6_addr32);
}
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v6_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
......@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
......
......@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
}
static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* The following checks are replicated from __ip6_datagram_connect()
* and intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
*/
if (uaddr->sa_family == AF_INET) {
if (__ipv6_only_sock(sk))
return -EAFNOSUPPORT;
return udp_pre_connect(sk, uaddr, addr_len);
}
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
}
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
......@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
.name = "UDPv6",
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
......
......@@ -33,7 +33,7 @@
/* Tracing for driver callbacks */
DECLARE_EVENT_CLASS(local_only_evt,
DECLARE_EVENT_CLASS(local_only_evt4,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local),
TP_STRUCT__entry(
......@@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt,
TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
);
DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
DEFINE_EVENT(local_only_evt4, 802154_drv_return_void,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
......@@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int,
__entry->ret)
);
DEFINE_EVENT(local_only_evt, 802154_drv_start,
DEFINE_EVENT(local_only_evt4, 802154_drv_start,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
DEFINE_EVENT(local_only_evt, 802154_drv_stop,
DEFINE_EVENT(local_only_evt4, 802154_drv_stop,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
......
......@@ -3184,7 +3184,7 @@ TRACE_EVENT(rdev_start_radar_detection,
TRACE_EVENT(rdev_set_mcast_rate,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
int mcast_rate[NUM_NL80211_BANDS]),
int *mcast_rate),
TP_ARGS(wiphy, netdev, mcast_rate),
TP_STRUCT__entry(
WIPHY_ENTRY
......
......@@ -119,6 +119,7 @@ always += offwaketime_kern.o
always += spintest_kern.o
always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_raw_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
......
......@@ -61,6 +61,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
......@@ -85,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
} else if (is_raw_tracepoint) {
prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
......@@ -131,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return populate_prog_array(event, fd);
}
if (is_raw_tracepoint) {
efd = bpf_raw_tracepoint_open(event + 15, fd);
if (efd < 0) {
printf("tracepoint %s %s\n", event + 15, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
return 0;
}
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
......@@ -587,6 +600,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
memcmp(shname, "raw_tracepoint/", 15) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
......
......@@ -246,7 +246,7 @@ static void udp_client(void)
recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
(struct sockaddr *)&si_me, &slen);
if (recv_len < 0)
error(1, errno, "revieve\n");
error(1, errno, "receive\n");
res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
sizeof(si_me.sin_addr));
if (res != 0)
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
SEC("raw_tracepoint/task_rename")
int prog(struct bpf_raw_tracepoint_args *ctx)
{
return 0;
}
SEC("raw_tracepoint/urandom_read")
int prog2(struct bpf_raw_tracepoint_args *ctx)
{
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -158,5 +158,17 @@ int main(int argc, char **argv)
unload_progs();
}
if (test_flags & 0xC0) {
snprintf(filename, sizeof(filename),
"%s_raw_tp_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
printf("w/RAW_TRACEPOINT\n");
run_perf_test(num_cpu, test_flags >> 6);
unload_progs();
}
return 0;
}
......@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
.max_entries = 20,
};
struct bpf_map_def SEC("maps") sock_apply_bytes = {
......@@ -78,6 +78,19 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
.max_entries = 2
};
struct bpf_map_def SEC("maps") sock_redir_flags = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
struct bpf_map_def SEC("maps") sock_skb_opts = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
......@@ -90,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
int ret = 0;
int len, *f, ret, zero = 0;
__u64 flags = 0;
if (lport == 10000)
ret = 10;
else
ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
len = (__u32)skb->data_end - (__u32)skb->data;
f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
flags = *f;
}
bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
len, flags);
return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
}
SEC("sockops")
......@@ -197,8 +219,9 @@ int bpf_prog5(struct sk_msg_md *msg)
SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1;
int *start, *end;
int *bytes, zero = 0, one = 1, key = 0;
int *start, *end, *f;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
......@@ -210,15 +233,22 @@ int bpf_prog6(struct sk_msg_md *msg)
end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0);
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
}
return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
}
SEC("sk_msg4")
int bpf_prog7(struct sk_msg_md *msg)
{
int err1 = 0, err2 = 0, zero = 0, one = 1;
int *bytes, *start, *end, len1, len2;
int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
int *f, *bytes, *start, *end, len1, len2;
__u64 flags = 0;
int err;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
err1 = bpf_msg_apply_bytes(msg, *bytes);
......@@ -229,7 +259,6 @@ int bpf_prog7(struct sk_msg_md *msg)
start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end) {
int err;
bpf_printk("sk_msg2: pull(%i:%i)\n",
start ? *start : 0, end ? *end : 0);
......@@ -241,9 +270,16 @@ int bpf_prog7(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length update %i->%i\n",
len1, len2);
}
bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
len1, err1, err2);
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
}
bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
len1, flags, err1 ? err1 : err2);
err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
bpf_printk("sk_msg3: err %i\n", err);
return err;
}
SEC("sk_msg5")
......
#Test a bunch of positive cases to verify basic functionality
for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
for t in "sendmsg" "sendpage"; do
for r in 1 10 100; do
for i in 1 10 100; do
......@@ -100,6 +100,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2
done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with larger value than send
r=1
i=8
......@@ -113,6 +132,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2
done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with apply that never reaches limit
r=1024
i=1
......
......@@ -64,6 +64,8 @@ int txmsg_apply;
int txmsg_cork;
int txmsg_start;
int txmsg_end;
int txmsg_ingress;
int txmsg_skb;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
......@@ -83,6 +85,8 @@ static const struct option long_options[] = {
{"txmsg_cork", required_argument, NULL, 'k'},
{"txmsg_start", required_argument, NULL, 's'},
{"txmsg_end", required_argument, NULL, 'e'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
{"txmsg_skb", no_argument, &txmsg_skb, 1 },
{0, 0, NULL, 0 }
};
......@@ -793,6 +797,60 @@ int main(int argc, char **argv)
return err;
}
}
if (txmsg_ingress) {
int in = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 1;
err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
err, strerror(errno));
}
err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
err, strerror(errno));
}
i = 2;
err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
err, strerror(errno));
}
}
if (txmsg_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
int ingress = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 3;
err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
err, strerror(errno));
}
}
}
if (txmsg_drop)
......
......@@ -43,15 +43,10 @@ struct aa_buffers {
DECLARE_PER_CPU(struct aa_buffers, aa_buffers);
#define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n
#define CONCAT(X, Y) X ## Y
#define CONCAT_AFTER(X, Y) CONCAT(X, Y)
#define ASSIGN(FN, X, N) ((X) = FN(N))
#define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/
#define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1); EVAL1(FN, Y); } while (0)
#define EVAL(FN, X...) CONCAT_AFTER(EVAL, COUNT_ARGS(X))(FN, X)
#define EVAL(FN, X...) CONCATENATE(EVAL, COUNT_ARGS(X))(FN, X)
#define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++)
......
......@@ -14,7 +14,7 @@
#include <linux/tracepoint.h>
TRACE_EVENT(in_packet,
TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index),
TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 *cip_header, unsigned int payload_length, unsigned int index),
TP_ARGS(s, cycles, cip_header, payload_length, index),
TP_STRUCT__entry(
__field(unsigned int, second)
......
......@@ -114,7 +114,7 @@ static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
}
static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
static void print_insn(void *private_data, const char *fmt, ...)
{
va_list args;
......@@ -124,7 +124,7 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
}
static void
print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
print_insn_for_graph(void *private_data, const char *fmt, ...)
{
char buf[64], *p;
va_list args;
......@@ -154,7 +154,7 @@ print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
printf("%s", buf);
}
static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
static void print_insn_json(void *private_data, const char *fmt, ...)
{
unsigned int l = strlen(fmt);
char chomped_fmt[l];
......@@ -248,7 +248,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "disasm");
print_bpf_insn(&cbs, NULL, insn + i, true);
print_bpf_insn(&cbs, insn + i, true);
if (opcodes) {
jsonw_name(json_wtr, "opcodes");
......@@ -302,7 +302,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
printf("% 4d: ", i);
print_bpf_insn(&cbs, NULL, insn + i, true);
print_bpf_insn(&cbs, insn + i, true);
if (opcodes) {
printf(" ");
......@@ -331,7 +331,7 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
for (; cur <= insn_end; cur++) {
printf("% 4d: ", (int)(cur - insn_start + start_idx));
print_bpf_insn(&cbs, NULL, cur, true);
print_bpf_insn(&cbs, cur, true);
if (cur != insn_end)
printf(" | ");
}
......
此差异已折叠。
......@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
-1);
}
int bpf_load_program_name(enum bpf_prog_type type, const char *name,
const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
size_t log_buf_sz)
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz)
{
int fd;
union bpf_attr attr;
__u32 name_len = name ? strlen(name) : 0;
__u32 name_len;
int fd;
if (!load_attr)
return -EINVAL;
name_len = load_attr->name ? strlen(load_attr->name) : 0;
bzero(&attr, sizeof(attr));
attr.prog_type = type;
attr.insn_cnt = (__u32)insns_cnt;
attr.insns = ptr_to_u64(insns);
attr.license = ptr_to_u64(license);
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
attr.insn_cnt = (__u32)load_attr->insns_cnt;
attr.insns = ptr_to_u64(load_attr->insns);
attr.license = ptr_to_u64(load_attr->license);
attr.log_buf = ptr_to_u64(NULL);
attr.log_size = 0;
attr.log_level = 0;
attr.kern_version = kern_version;
memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.kern_version = load_attr->kern_version;
memcpy(attr.prog_name, load_attr->name,
min(name_len, BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (fd >= 0 || !log_buf || !log_buf_sz)
......@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
__u32 kern_version, char *log_buf,
size_t log_buf_sz)
{
return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
kern_version, log_buf, log_buf_sz);
struct bpf_load_program_attr load_attr;
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = type;
load_attr.expected_attach_type = 0;
load_attr.name = NULL;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
load_attr.kern_version = kern_version;
return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
}
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
......@@ -428,6 +442,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
return err;
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
}
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
{
struct sockaddr_nl sa;
......
此差异已折叠。
此差异已折叠。
......@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
long libbpf_get_error(const void *ptr);
struct bpf_prog_load_attr {
const char *file;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
};
int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_object **pobj, int *prog_fd);
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
......
......@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
(void *) BPF_FUNC_msg_cork_bytes;
static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册