提交 1379ef82 编写于 作者: D Daniel Borkmann

Merge branch 'bpf-sockmap-ingress'

John Fastabend says:

====================
This series adds the BPF_F_INGRESS flag support to the redirect APIs.
Bringing the sockmap API in-line with the cls_bpf redirect APIs.

We add it to both variants of sockmap programs, the first patch adds
support for tx ulp hooks and the third patch adds support for the recv
skb hooks. Patches two and four add tests for the corresponding
ingress redirect hooks.

Follow on patches can address busy polling support, but next series
from me will move the sockmap sample program into selftests.

v2: added static to function definition caught by kbuild bot
v3: fixed an error branch with missing mem_uncharge
    in recvmsg op moved receive_queue check outside of RCU region
====================
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
...@@ -521,6 +521,8 @@ struct sk_msg_buff { ...@@ -521,6 +521,8 @@ struct sk_msg_buff {
__u32 key; __u32 key;
__u32 flags; __u32 flags;
struct bpf_map *map; struct bpf_map *map;
struct sk_buff *skb;
struct list_head list;
}; };
/* Compute the linear packet data range [data, data_end) which /* Compute the linear packet data range [data, data_end) which
......
...@@ -1085,6 +1085,7 @@ struct proto { ...@@ -1085,6 +1085,7 @@ struct proto {
#endif #endif
bool (*stream_memory_free)(const struct sock *sk); bool (*stream_memory_free)(const struct sock *sk);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */ /* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk); void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk);
......
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <net/strparser.h> #include <net/strparser.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
#define SOCK_CREATE_FLAG_MASK \ #define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
...@@ -82,6 +84,7 @@ struct smap_psock { ...@@ -82,6 +84,7 @@ struct smap_psock {
int sg_size; int sg_size;
int eval; int eval;
struct sk_msg_buff *cork; struct sk_msg_buff *cork;
struct list_head ingress;
struct strparser strp; struct strparser strp;
struct bpf_prog *bpf_tx_msg; struct bpf_prog *bpf_tx_msg;
...@@ -103,6 +106,8 @@ struct smap_psock { ...@@ -103,6 +106,8 @@ struct smap_psock {
}; };
static void smap_release_sock(struct smap_psock *psock, struct sock *sock); static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page, static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags); int offset, size_t size, int flags);
...@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) ...@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
return rcu_dereference_sk_user_data(sk); return rcu_dereference_sk_user_data(sk);
} }
static bool bpf_tcp_stream_read(const struct sock *sk)
{
struct smap_psock *psock;
bool empty = true;
rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock))
goto out;
empty = list_empty(&psock->ingress);
out:
rcu_read_unlock();
return !empty;
}
static struct proto tcp_bpf_proto; static struct proto tcp_bpf_proto;
static int bpf_tcp_init(struct sock *sk) static int bpf_tcp_init(struct sock *sk)
{ {
...@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk) ...@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk)
if (psock->bpf_tx_msg) { if (psock->bpf_tx_msg) {
tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
tcp_bpf_proto.sendpage = bpf_tcp_sendpage; tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
} }
sk->sk_prot = &tcp_bpf_proto; sk->sk_prot = &tcp_bpf_proto;
...@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
{ {
void (*close_fun)(struct sock *sk, long timeout); void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e, *tmp; struct smap_psock_map_entry *e, *tmp;
struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock; struct smap_psock *psock;
struct sock *osk; struct sock *osk;
...@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
close_fun = psock->save_close; close_fun = psock->save_close;
write_lock_bh(&sk->sk_callback_lock); write_lock_bh(&sk->sk_callback_lock);
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
kfree(md);
}
list_for_each_entry_safe(e, tmp, &psock->maps, list) { list_for_each_entry_safe(e, tmp, &psock->maps, list) {
osk = cmpxchg(e->entry, sk, NULL); osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) { if (osk == sk) {
...@@ -468,6 +497,72 @@ static unsigned int smap_do_tx_msg(struct sock *sk, ...@@ -468,6 +497,72 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
return _rc; return _rc;
} }
static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
struct smap_psock *psock,
struct sk_msg_buff *md, int flags)
{
bool apply = apply_bytes;
size_t size, copied = 0;
struct sk_msg_buff *r;
int err = 0, i;
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
if (unlikely(!r))
return -ENOMEM;
lock_sock(sk);
r->sg_start = md->sg_start;
i = md->sg_start;
do {
r->sg_data[i] = md->sg_data[i];
size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length;
if (!sk_wmem_schedule(sk, size)) {
if (!copied)
err = -ENOMEM;
break;
}
sk_mem_charge(sk, size);
r->sg_data[i].length = size;
md->sg_data[i].length -= size;
md->sg_data[i].offset += size;
copied += size;
if (md->sg_data[i].length) {
get_page(sg_page(&r->sg_data[i]));
r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
} else {
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
r->sg_end = i;
}
if (apply) {
apply_bytes -= size;
if (!apply_bytes)
break;
}
} while (i != md->sg_end);
md->sg_start = i;
if (!err) {
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
} else {
free_start_sg(sk, r);
kfree(r);
}
release_sock(sk);
return err;
}
static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct sk_msg_buff *md, struct sk_msg_buff *md,
int flags) int flags)
...@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, ...@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct smap_psock *psock; struct smap_psock *psock;
struct scatterlist *sg; struct scatterlist *sg;
int i, err, free = 0; int i, err, free = 0;
bool ingress = !!(md->flags & BPF_F_INGRESS);
sg = md->sg_data; sg = md->sg_data;
...@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, ...@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
goto out_rcu; goto out_rcu;
rcu_read_unlock(); rcu_read_unlock();
lock_sock(sk);
err = bpf_tcp_push(sk, send, md, flags, false); if (ingress) {
release_sock(sk); err = bpf_tcp_ingress(sk, send, psock, md, flags);
} else {
lock_sock(sk);
err = bpf_tcp_push(sk, send, md, flags, false);
release_sock(sk);
}
smap_release_sock(psock, sk); smap_release_sock(psock, sk);
if (unlikely(err)) if (unlikely(err))
goto out; goto out;
...@@ -623,6 +724,92 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock, ...@@ -623,6 +724,92 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
return err; return err;
} }
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct iov_iter *iter = &msg->msg_iter;
struct smap_psock *psock;
int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
rcu_read_lock();
psock = smap_psock_sk(sk);
if (unlikely(!psock))
goto out;
if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
goto out;
rcu_read_unlock();
if (!skb_queue_empty(&sk->sk_receive_queue))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
while (copied != len) {
struct scatterlist *sg;
struct sk_msg_buff *md;
int i;
md = list_first_entry_or_null(&psock->ingress,
struct sk_msg_buff, list);
if (unlikely(!md))
break;
i = md->sg_start;
do {
struct page *page;
int n, copy;
sg = &md->sg_data[i];
copy = sg->length;
page = sg_page(sg);
if (copied + copy > len)
copy = len - copied;
n = copy_page_to_iter(page, sg->offset, copy, iter);
if (n != copy) {
md->sg_start = i;
release_sock(sk);
smap_release_sock(psock, sk);
return -EFAULT;
}
copied += copy;
sg->offset += copy;
sg->length -= copy;
sk_mem_uncharge(sk, copy);
if (!sg->length) {
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
if (!md->skb)
put_page(page);
}
if (copied == len)
break;
} while (i != md->sg_end);
md->sg_start = i;
if (!sg->length && md->sg_start == md->sg_end) {
list_del(&md->list);
if (md->skb)
consume_skb(md->skb);
kfree(md);
}
}
release_sock(sk);
smap_release_sock(psock, sk);
return copied;
out:
rcu_read_unlock();
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
}
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{ {
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS; int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
...@@ -861,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -861,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
__SK_DROP; __SK_DROP;
} }
static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sock;
int copied = 0, num_sg;
struct sk_msg_buff *r;
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!r))
return -EAGAIN;
if (!sk_rmem_schedule(sk, skb, skb->len)) {
kfree(r);
return -EAGAIN;
}
sg_init_table(r->sg_data, MAX_SKB_FRAGS);
num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
if (unlikely(num_sg < 0)) {
kfree(r);
return num_sg;
}
sk_mem_charge(sk, skb->len);
copied = skb->len;
r->sg_start = 0;
r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
r->skb = skb;
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
return copied;
}
static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
{ {
struct smap_psock *peer;
struct sock *sk; struct sock *sk;
__u32 in;
int rc; int rc;
rc = smap_verdict_func(psock, skb); rc = smap_verdict_func(psock, skb);
switch (rc) { switch (rc) {
case __SK_REDIRECT: case __SK_REDIRECT:
sk = do_sk_redirect_map(skb); sk = do_sk_redirect_map(skb);
if (likely(sk)) { if (!sk) {
struct smap_psock *peer = smap_psock_sk(sk); kfree_skb(skb);
break;
if (likely(peer && }
test_bit(SMAP_TX_RUNNING, &peer->state) &&
!sock_flag(sk, SOCK_DEAD) && peer = smap_psock_sk(sk);
sock_writeable(sk))) { in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb); if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
schedule_work(&peer->tx_work); !test_bit(SMAP_TX_RUNNING, &peer->state))) {
break; kfree_skb(skb);
} break;
}
if (!in && sock_writeable(sk)) {
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
} else if (in &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
} }
/* Fall through and free skb otherwise */ /* Fall through and free skb otherwise */
case __SK_DROP: case __SK_DROP:
...@@ -943,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w) ...@@ -943,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
} }
while ((skb = skb_dequeue(&psock->rxqueue))) { while ((skb = skb_dequeue(&psock->rxqueue))) {
__u32 flags;
rem = skb->len; rem = skb->len;
off = 0; off = 0;
start: start:
flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
do { do {
if (likely(psock->sock->sk_socket)) if (likely(psock->sock->sk_socket)) {
n = skb_send_sock_locked(psock->sock, if (flags)
skb, off, rem); n = smap_do_ingress(psock, skb);
else else
n = skb_send_sock_locked(psock->sock,
skb, off, rem);
} else {
n = -EINVAL; n = -EINVAL;
}
if (n <= 0) { if (n <= 0) {
if (n == -EAGAIN) { if (n == -EAGAIN) {
/* Retry when space is available */ /* Retry when space is available */
...@@ -969,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w) ...@@ -969,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w)
rem -= n; rem -= n;
off += n; off += n;
} while (rem); } while (rem);
kfree_skb(skb);
if (!flags)
kfree_skb(skb);
} }
out: out:
release_sock(psock->sock); release_sock(psock->sock);
...@@ -1107,6 +1349,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab) ...@@ -1107,6 +1349,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
static void smap_gc_work(struct work_struct *w) static void smap_gc_work(struct work_struct *w)
{ {
struct smap_psock_map_entry *e, *tmp; struct smap_psock_map_entry *e, *tmp;
struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock; struct smap_psock *psock;
psock = container_of(w, struct smap_psock, gc_work); psock = container_of(w, struct smap_psock, gc_work);
...@@ -1131,6 +1374,12 @@ static void smap_gc_work(struct work_struct *w) ...@@ -1131,6 +1374,12 @@ static void smap_gc_work(struct work_struct *w)
kfree(psock->cork); kfree(psock->cork);
} }
list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
list_del(&md->list);
free_start_sg(psock->sock, md);
kfree(md);
}
list_for_each_entry_safe(e, tmp, &psock->maps, list) { list_for_each_entry_safe(e, tmp, &psock->maps, list) {
list_del(&e->list); list_del(&e->list);
kfree(e); kfree(e);
...@@ -1160,6 +1409,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, ...@@ -1160,6 +1409,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
INIT_WORK(&psock->tx_work, smap_tx_work); INIT_WORK(&psock->tx_work, smap_tx_work);
INIT_WORK(&psock->gc_work, smap_gc_work); INIT_WORK(&psock->gc_work, smap_gc_work);
INIT_LIST_HEAD(&psock->maps); INIT_LIST_HEAD(&psock->maps);
INIT_LIST_HEAD(&psock->ingress);
refcount_set(&psock->refcnt, 1); refcount_set(&psock->refcnt, 1);
rcu_assign_sk_user_data(sock, psock); rcu_assign_sk_user_data(sock, psock);
......
...@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, ...@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */ /* If user passes invalid input drop the packet. */
if (unlikely(flags)) if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP; return SK_DROP;
tcb->bpf.key = key; tcb->bpf.key = key;
...@@ -1894,7 +1894,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg, ...@@ -1894,7 +1894,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
struct bpf_map *, map, u32, key, u64, flags) struct bpf_map *, map, u32, key, u64, flags)
{ {
/* If user passes invalid input drop the packet. */ /* If user passes invalid input drop the packet. */
if (unlikely(flags)) if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP; return SK_DROP;
msg->key = key; msg->key = key;
......
...@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) ...@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
} }
} }
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
return (tp->rcv_nxt - tp->copied_seq >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
/* /*
* Wait for a TCP event. * Wait for a TCP event.
* *
...@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) ...@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data) tp->urg_data)
target++; target++;
if (tp->rcv_nxt - tp->copied_seq >= target) if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
......
...@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = { ...@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
.type = BPF_MAP_TYPE_SOCKMAP, .type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int), .key_size = sizeof(int),
.value_size = sizeof(int), .value_size = sizeof(int),
.max_entries = 1, .max_entries = 20,
}; };
struct bpf_map_def SEC("maps") sock_apply_bytes = { struct bpf_map_def SEC("maps") sock_apply_bytes = {
...@@ -78,6 +78,19 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = { ...@@ -78,6 +78,19 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
.max_entries = 2 .max_entries = 2
}; };
struct bpf_map_def SEC("maps") sock_redir_flags = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
struct bpf_map_def SEC("maps") sock_skb_opts = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
SEC("sk_skb1") SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb) int bpf_prog1(struct __sk_buff *skb)
...@@ -90,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb) ...@@ -90,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
{ {
__u32 lport = skb->local_port; __u32 lport = skb->local_port;
__u32 rport = skb->remote_port; __u32 rport = skb->remote_port;
int ret = 0; int len, *f, ret, zero = 0;
__u64 flags = 0;
if (lport == 10000) if (lport == 10000)
ret = 10; ret = 10;
else else
ret = 1; ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); len = (__u32)skb->data_end - (__u32)skb->data;
return bpf_sk_redirect_map(skb, &sock_map, ret, 0); f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
flags = *f;
}
bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
len, flags);
return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
} }
SEC("sockops") SEC("sockops")
...@@ -197,8 +219,9 @@ int bpf_prog5(struct sk_msg_md *msg) ...@@ -197,8 +219,9 @@ int bpf_prog5(struct sk_msg_md *msg)
SEC("sk_msg3") SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg) int bpf_prog6(struct sk_msg_md *msg)
{ {
int *bytes, zero = 0, one = 1; int *bytes, zero = 0, one = 1, key = 0;
int *start, *end; int *start, *end, *f;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes) if (bytes)
...@@ -210,15 +233,22 @@ int bpf_prog6(struct sk_msg_md *msg) ...@@ -210,15 +233,22 @@ int bpf_prog6(struct sk_msg_md *msg)
end = bpf_map_lookup_elem(&sock_pull_bytes, &one); end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end) if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0); bpf_msg_pull_data(msg, *start, *end, 0);
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
}
return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
} }
SEC("sk_msg4") SEC("sk_msg4")
int bpf_prog7(struct sk_msg_md *msg) int bpf_prog7(struct sk_msg_md *msg)
{ {
int err1 = 0, err2 = 0, zero = 0, one = 1; int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
int *bytes, *start, *end, len1, len2; int *f, *bytes, *start, *end, len1, len2;
__u64 flags = 0;
int err;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes) if (bytes)
err1 = bpf_msg_apply_bytes(msg, *bytes); err1 = bpf_msg_apply_bytes(msg, *bytes);
...@@ -229,7 +259,6 @@ int bpf_prog7(struct sk_msg_md *msg) ...@@ -229,7 +259,6 @@ int bpf_prog7(struct sk_msg_md *msg)
start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
end = bpf_map_lookup_elem(&sock_pull_bytes, &one); end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end) { if (start && end) {
int err;
bpf_printk("sk_msg2: pull(%i:%i)\n", bpf_printk("sk_msg2: pull(%i:%i)\n",
start ? *start : 0, end ? *end : 0); start ? *start : 0, end ? *end : 0);
...@@ -241,9 +270,16 @@ int bpf_prog7(struct sk_msg_md *msg) ...@@ -241,9 +270,16 @@ int bpf_prog7(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length update %i->%i\n", bpf_printk("sk_msg2: length update %i->%i\n",
len1, len2); len1, len2);
} }
bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n", f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
len1, err1, err2); if (f && *f) {
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); key = 2;
flags = *f;
}
bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
len1, flags, err1 ? err1 : err2);
err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
bpf_printk("sk_msg3: err %i\n", err);
return err;
} }
SEC("sk_msg5") SEC("sk_msg5")
......
#Test a bunch of positive cases to verify basic functionality #Test a bunch of positive cases to verify basic functionality
for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
for t in "sendmsg" "sendpage"; do for t in "sendmsg" "sendpage"; do
for r in 1 10 100; do for r in 1 10 100; do
for i in 1 10 100; do for i in 1 10 100; do
...@@ -100,6 +100,25 @@ for t in "sendmsg" "sendpage"; do ...@@ -100,6 +100,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2 sleep 2
done done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with larger value than send # Test apply and redirect with larger value than send
r=1 r=1
i=8 i=8
...@@ -113,6 +132,25 @@ for t in "sendmsg" "sendpage"; do ...@@ -113,6 +132,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2 sleep 2
done done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with apply that never reaches limit # Test apply and redirect with apply that never reaches limit
r=1024 r=1024
i=1 i=1
......
...@@ -64,6 +64,8 @@ int txmsg_apply; ...@@ -64,6 +64,8 @@ int txmsg_apply;
int txmsg_cork; int txmsg_cork;
int txmsg_start; int txmsg_start;
int txmsg_end; int txmsg_end;
int txmsg_ingress;
int txmsg_skb;
static const struct option long_options[] = { static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' }, {"help", no_argument, NULL, 'h' },
...@@ -83,6 +85,8 @@ static const struct option long_options[] = { ...@@ -83,6 +85,8 @@ static const struct option long_options[] = {
{"txmsg_cork", required_argument, NULL, 'k'}, {"txmsg_cork", required_argument, NULL, 'k'},
{"txmsg_start", required_argument, NULL, 's'}, {"txmsg_start", required_argument, NULL, 's'},
{"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_end", required_argument, NULL, 'e'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
{"txmsg_skb", no_argument, &txmsg_skb, 1 },
{0, 0, NULL, 0 } {0, 0, NULL, 0 }
}; };
...@@ -793,6 +797,60 @@ int main(int argc, char **argv) ...@@ -793,6 +797,60 @@ int main(int argc, char **argv)
return err; return err;
} }
} }
if (txmsg_ingress) {
int in = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 1;
err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
err, strerror(errno));
}
err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
err, strerror(errno));
}
i = 2;
err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
err, strerror(errno));
}
}
if (txmsg_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
int ingress = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 3;
err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
err, strerror(errno));
}
}
} }
if (txmsg_drop) if (txmsg_drop)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册