提交 4e10df9a 编写于 作者: A Alexei Starovoitov 提交者: David S. Miller

bpf: introduce bpf_skb_vlan_push/pop() helpers

Allow eBPF programs attached to TC qdiscs call skb_vlan_push/pop via
helper functions. These functions may change skb->data/hlen which are
cached by some JITs to improve performance of ld_abs/ld_ind instructions.
Therefore JITs need to recognize bpf_skb_vlan_push/pop() calls,
re-compute header len and re-cache skb->data/hlen back into cpu registers.
Note, skb->data/hlen are not directly accessible from the programs,
so any changes to skb->data done either by these helpers or by other
TC actions are safe.

eBPF JIT supported by three architectures:
- arm64 JIT is using bpf_load_pointer() without caching, so it's ok as-is.
- x64 JIT re-caches skb->data/hlen unconditionally after vlan_push/pop calls
  (experiments showed that conditional re-caching is slower).
- s390 JIT falls back to interpreter for now when bpf_skb_vlan_push() is present
  in the program (re-caching is tbd).

These helpers allow more scalable handling of vlan from the programs.
Instead of creating thousands of vlan netdevs on top of eth0 and attaching
TC+ingress+bpf to all of them, the program can be attached to eth0 directly
and manipulate vlans as necessary.
Signed-off-by: NAlexei Starovoitov <ast@plumgrid.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 f3120acc
...@@ -973,6 +973,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i ...@@ -973,6 +973,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
*/ */
const u64 func = (u64)__bpf_call_base + imm; const u64 func = (u64)__bpf_call_base + imm;
if (bpf_helper_changes_skb_data((void *)func))
/* TODO reload skb->data, hlen */
return -1;
REG_SET_SEEN(BPF_REG_5); REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC; jit->seen |= SEEN_FUNC;
/* lg %w1,<d(imm)>(%l) */ /* lg %w1,<d(imm)>(%l) */
......
...@@ -315,6 +315,26 @@ static void emit_bpf_tail_call(u8 **pprog) ...@@ -315,6 +315,26 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog; *pprog = prog;
} }
static void emit_load_skb_data_hlen(u8 **pprog)
{
u8 *prog = *pprog;
int cnt = 0;
/* r9d = skb->len - skb->data_len (headlen)
* r10 = skb->data
*/
/* mov %r9d, off32(%rdi) */
EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
/* sub %r9d, off32(%rdi) */
EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
/* mov %r10, off32(%rdi) */
EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
*pprog = prog;
}
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx) int oldproglen, struct jit_context *ctx)
{ {
...@@ -329,36 +349,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -329,36 +349,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
emit_prologue(&prog); emit_prologue(&prog);
if (seen_ld_abs) { if (seen_ld_abs)
/* r9d : skb->len - skb->data_len (headlen) emit_load_skb_data_hlen(&prog);
* r10 : skb->data
*/
if (is_imm8(offsetof(struct sk_buff, len)))
/* mov %r9d, off8(%rdi) */
EMIT4(0x44, 0x8b, 0x4f,
offsetof(struct sk_buff, len));
else
/* mov %r9d, off32(%rdi) */
EMIT3_off32(0x44, 0x8b, 0x8f,
offsetof(struct sk_buff, len));
if (is_imm8(offsetof(struct sk_buff, data_len)))
/* sub %r9d, off8(%rdi) */
EMIT4(0x44, 0x2b, 0x4f,
offsetof(struct sk_buff, data_len));
else
EMIT3_off32(0x44, 0x2b, 0x8f,
offsetof(struct sk_buff, data_len));
if (is_imm8(offsetof(struct sk_buff, data)))
/* mov %r10, off8(%rdi) */
EMIT4(0x4c, 0x8b, 0x57,
offsetof(struct sk_buff, data));
else
/* mov %r10, off32(%rdi) */
EMIT3_off32(0x4c, 0x8b, 0x97,
offsetof(struct sk_buff, data));
}
for (i = 0; i < insn_cnt; i++, insn++) { for (i = 0; i < insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm; const s32 imm32 = insn->imm;
...@@ -367,6 +359,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -367,6 +359,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
u8 b1 = 0, b2 = 0, b3 = 0; u8 b1 = 0, b2 = 0, b3 = 0;
s64 jmp_offset; s64 jmp_offset;
u8 jmp_cond; u8 jmp_cond;
bool reload_skb_data;
int ilen; int ilen;
u8 *func; u8 *func;
...@@ -818,12 +811,18 @@ xadd: if (is_imm8(insn->off)) ...@@ -818,12 +811,18 @@ xadd: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32; func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]); jmp_offset = func - (image + addrs[i]);
if (seen_ld_abs) { if (seen_ld_abs) {
EMIT2(0x41, 0x52); /* push %r10 */ reload_skb_data = bpf_helper_changes_skb_data(func);
EMIT2(0x41, 0x51); /* push %r9 */ if (reload_skb_data) {
/* need to adjust jmp offset, since EMIT1(0x57); /* push %rdi */
* pop %r9, pop %r10 take 4 bytes after call insn jmp_offset += 22; /* pop, mov, sub, mov */
*/ } else {
jmp_offset += 4; EMIT2(0x41, 0x52); /* push %r10 */
EMIT2(0x41, 0x51); /* push %r9 */
/* need to adjust jmp offset, since
* pop %r9, pop %r10 take 4 bytes after call insn
*/
jmp_offset += 4;
}
} }
if (!imm32 || !is_simm32(jmp_offset)) { if (!imm32 || !is_simm32(jmp_offset)) {
pr_err("unsupported bpf func %d addr %p image %p\n", pr_err("unsupported bpf func %d addr %p image %p\n",
...@@ -832,8 +831,13 @@ xadd: if (is_imm8(insn->off)) ...@@ -832,8 +831,13 @@ xadd: if (is_imm8(insn->off))
} }
EMIT1_off32(0xE8, jmp_offset); EMIT1_off32(0xE8, jmp_offset);
if (seen_ld_abs) { if (seen_ld_abs) {
EMIT2(0x41, 0x59); /* pop %r9 */ if (reload_skb_data) {
EMIT2(0x41, 0x5A); /* pop %r10 */ EMIT1(0x5F); /* pop %rdi */
emit_load_skb_data_hlen(&prog);
} else {
EMIT2(0x41, 0x59); /* pop %r9 */
EMIT2(0x41, 0x5A); /* pop %r10 */
}
} }
break; break;
......
...@@ -192,5 +192,7 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto; ...@@ -192,5 +192,7 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
#endif /* _LINUX_BPF_H */ #endif /* _LINUX_BPF_H */
...@@ -411,6 +411,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); ...@@ -411,6 +411,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_int_jit_compile(struct bpf_prog *fp); void bpf_int_jit_compile(struct bpf_prog *fp);
bool bpf_helper_changes_skb_data(void *func);
#ifdef CONFIG_BPF_JIT #ifdef CONFIG_BPF_JIT
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
......
...@@ -256,6 +256,8 @@ enum bpf_func_id { ...@@ -256,6 +256,8 @@ enum bpf_func_id {
* Return: classid if != 0 * Return: classid if != 0
*/ */
BPF_FUNC_get_cgroup_classid, BPF_FUNC_get_cgroup_classid,
BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
......
...@@ -1437,6 +1437,50 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { ...@@ -1437,6 +1437,50 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX, .arg1_type = ARG_PTR_TO_CTX,
}; };
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
__be16 vlan_proto = (__force __be16) r2;
if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
vlan_proto != htons(ETH_P_8021AD)))
vlan_proto = htons(ETH_P_8021Q);
return skb_vlan_push(skb, vlan_proto, vlan_tci);
}
const struct bpf_func_proto bpf_skb_vlan_push_proto = {
.func = bpf_skb_vlan_push,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
return skb_vlan_pop(skb);
}
const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
.func = bpf_skb_vlan_pop,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
bool bpf_helper_changes_skb_data(void *func)
{
if (func == bpf_skb_vlan_push)
return true;
if (func == bpf_skb_vlan_pop)
return true;
return false;
}
static const struct bpf_func_proto * static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id) sk_filter_func_proto(enum bpf_func_id func_id)
{ {
...@@ -1476,6 +1520,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) ...@@ -1476,6 +1520,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_clone_redirect_proto; return &bpf_clone_redirect_proto;
case BPF_FUNC_get_cgroup_classid: case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_proto; return &bpf_get_cgroup_classid_proto;
case BPF_FUNC_skb_vlan_push:
return &bpf_skb_vlan_push_proto;
case BPF_FUNC_skb_vlan_pop:
return &bpf_skb_vlan_pop_proto;
default: default:
return sk_filter_func_proto(func_id); return sk_filter_func_proto(func_id);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册