提交 36dbe571 编写于 作者: D Daniel Borkmann

Merge branch 'bpf-sk-msg-pop-data'

John Fastabend says:

====================
After being able to add metadata to messages with sk_msg_push_data we
have also found it useful to be able to "pop" this metadata off before
sending it to applications in some cases. This series adds a new helper
sk_msg_pop_data() and the associated patches to add tests and tools/lib
support.

Thanks!

v2: Daniel caught that we missed adding sk_msg_pop_data to the changes
    data helper so that the verifier ensures BPF programs revalidate
    data after using this helper. Also improve documentation adding a
    return description and using RST syntax per Quentin's comment. And
    delta calculations for DROP with pop'd data (albeit a strange set
    of operations for a program to be doing) had potential to be
    incorrect possibly confusing user space applications, so fix it.
====================
Acked-by: NAlexei Starovoitov <ast@kernel.org>
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
...@@ -2268,6 +2268,19 @@ union bpf_attr { ...@@ -2268,6 +2268,19 @@ union bpf_attr {
* *
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
* Description
* Will remove *pop* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
* an allocation and copy are required due to a full ring buffer.
* However, the helper will try to avoid doing the allocation
* if possible. Other errors can occur if input parameters are
* invalid either due to *start* byte not being valid part of msg
* payload and/or *pop* value being to large.
*
* Return
* 0 on success, or a negative erro in case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2360,7 +2373,8 @@ union bpf_attr { ...@@ -2360,7 +2373,8 @@ union bpf_attr {
FN(map_push_elem), \ FN(map_push_elem), \
FN(map_pop_elem), \ FN(map_pop_elem), \
FN(map_peek_elem), \ FN(map_peek_elem), \
FN(msg_push_data), FN(msg_push_data), \
FN(msg_pop_data),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -2425,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = { ...@@ -2425,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
.arg4_type = ARG_ANYTHING, .arg4_type = ARG_ANYTHING,
}; };
static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
int prev;
do {
prev = i;
sk_msg_iter_var_next(i);
msg->sg.data[prev] = msg->sg.data[i];
} while (i != msg->sg.end);
sk_msg_iter_prev(msg, end);
}
static void sk_msg_shift_right(struct sk_msg *msg, int i)
{
struct scatterlist tmp, sge;
sk_msg_iter_next(msg, end);
sge = sk_msg_elem_cpy(msg, i);
sk_msg_iter_var_next(i);
tmp = sk_msg_elem_cpy(msg, i);
while (i != msg->sg.end) {
msg->sg.data[i] = sge;
sk_msg_iter_var_next(i);
sge = tmp;
tmp = sk_msg_elem_cpy(msg, i);
}
}
BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
u32 i = 0, l, space, offset = 0;
u64 last = start + len;
int pop;
if (unlikely(flags))
return -EINVAL;
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
/* Bounds checks: start and pop must be inside message */
if (start >= offset + l || last >= msg->sg.size)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
pop = len;
/* --------------| offset
* -| start |-------- len -------|
*
* |----- a ----|-------- pop -------|----- b ----|
* |______________________________________________| length
*
*
* a: region at front of scatter element to save
* b: region at back of scatter element to save when length > A + pop
* pop: region to pop from element, same as input 'pop' here will be
* decremented below per iteration.
*
* Two top-level cases to handle when start != offset, first B is non
* zero and second B is zero corresponding to when a pop includes more
* than one element.
*
* Then if B is non-zero AND there is no space allocate space and
* compact A, B regions into page. If there is space shift ring to
* the rigth free'ing the next element in ring to place B, leaving
* A untouched except to reduce length.
*/
if (start != offset) {
struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
int a = start;
int b = sge->length - pop - a;
sk_msg_iter_var_next(i);
if (pop < sge->length - a) {
if (space) {
sge->length = a;
sk_msg_shift_right(msg, i);
nsge = sk_msg_elem(msg, i);
get_page(sg_page(sge));
sg_set_page(nsge,
sg_page(sge),
b, sge->offset + pop + a);
} else {
struct page *page, *orig;
u8 *to, *from;
page = alloc_pages(__GFP_NOWARN |
__GFP_COMP | GFP_ATOMIC,
get_order(a + b));
if (unlikely(!page))
return -ENOMEM;
sge->length = a;
orig = sg_page(sge);
from = sg_virt(sge);
to = page_address(page);
memcpy(to, from, a);
memcpy(to + a, from + a + pop, b);
sg_set_page(sge, page, a + b, 0);
put_page(orig);
}
pop = 0;
} else if (pop >= sge->length - a) {
sge->length = a;
pop -= (sge->length - a);
}
}
/* From above the current layout _must_ be as follows,
*
* -| offset
* -| start
*
* |---- pop ---|---------------- b ------------|
* |____________________________________________| length
*
* Offset and start of the current msg elem are equal because in the
* previous case we handled offset != start and either consumed the
* entire element and advanced to the next element OR pop == 0.
*
* Two cases to handle here are first pop is less than the length
* leaving some remainder b above. Simply adjust the element's layout
* in this case. Or pop >= length of the element so that b = 0. In this
* case advance to next element decrementing pop.
*/
while (pop) {
struct scatterlist *sge = sk_msg_elem(msg, i);
if (pop < sge->length) {
sge->length -= pop;
sge->offset += pop;
pop = 0;
} else {
pop -= sge->length;
sk_msg_shift_left(msg, i);
}
sk_msg_iter_var_next(i);
}
sk_mem_uncharge(msg->sk, len - pop);
msg->sg.size -= (len - pop);
sk_msg_compute_data_pointers(msg);
return 0;
}
static const struct bpf_func_proto bpf_msg_pop_data_proto = {
.func = bpf_msg_pop_data,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{ {
return task_get_classid(skb); return task_get_classid(skb);
...@@ -5098,6 +5266,7 @@ bool bpf_helper_changes_pkt_data(void *func) ...@@ -5098,6 +5266,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta || func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data || func == bpf_msg_pull_data ||
func == bpf_msg_push_data || func == bpf_msg_push_data ||
func == bpf_msg_pop_data ||
func == bpf_xdp_adjust_tail || func == bpf_xdp_adjust_tail ||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes || func == bpf_lwt_seg6_store_bytes ||
...@@ -5394,6 +5563,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -5394,6 +5563,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pull_data_proto; return &bpf_msg_pull_data_proto;
case BPF_FUNC_msg_push_data: case BPF_FUNC_msg_push_data:
return &bpf_msg_push_data_proto; return &bpf_msg_push_data_proto;
case BPF_FUNC_msg_pop_data:
return &bpf_msg_pop_data_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
......
...@@ -289,12 +289,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, ...@@ -289,12 +289,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
{ {
bool cork = false, enospc = msg->sg.start == msg->sg.end; bool cork = false, enospc = msg->sg.start == msg->sg.end;
struct sock *sk_redir; struct sock *sk_redir;
u32 tosend; u32 tosend, delta = 0;
int ret; int ret;
more_data: more_data:
if (psock->eval == __SK_NONE) if (psock->eval == __SK_NONE) {
/* Track delta in msg size to add/subtract it on SK_DROP from
* returned to user copied size. This ensures user doesn't
* get a positive return code with msg_cut_data and SK_DROP
* verdict.
*/
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg); psock->eval = sk_psock_msg_verdict(sk, psock, msg);
if (msg->sg.size < delta)
delta -= msg->sg.size;
else
delta = 0;
}
if (msg->cork_bytes && if (msg->cork_bytes &&
msg->cork_bytes > msg->sg.size && !enospc) { msg->cork_bytes > msg->sg.size && !enospc) {
...@@ -350,7 +361,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, ...@@ -350,7 +361,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
default: default:
sk_msg_free_partial(sk, msg, tosend); sk_msg_free_partial(sk, msg, tosend);
sk_msg_apply_bytes(psock, tosend); sk_msg_apply_bytes(psock, tosend);
*copied -= tosend; *copied -= (tosend + delta);
return -EACCES; return -EACCES;
} }
......
...@@ -687,6 +687,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, ...@@ -687,6 +687,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sock *sk_redir; struct sock *sk_redir;
struct tls_rec *rec; struct tls_rec *rec;
int err = 0, send; int err = 0, send;
u32 delta = 0;
bool enospc; bool enospc;
psock = sk_psock_get(sk); psock = sk_psock_get(sk);
...@@ -694,8 +695,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, ...@@ -694,8 +695,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
return tls_push_record(sk, flags, record_type); return tls_push_record(sk, flags, record_type);
more_data: more_data:
enospc = sk_msg_full(msg); enospc = sk_msg_full(msg);
if (psock->eval == __SK_NONE) if (psock->eval == __SK_NONE) {
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg); psock->eval = sk_psock_msg_verdict(sk, psock, msg);
if (delta < msg->sg.size)
delta -= msg->sg.size;
else
delta = 0;
}
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) { !enospc && !full_record) {
err = -ENOSPC; err = -ENOSPC;
...@@ -743,7 +750,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, ...@@ -743,7 +750,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
msg->apply_bytes -= send; msg->apply_bytes -= send;
if (msg->sg.size == 0) if (msg->sg.size == 0)
tls_free_open_rec(sk); tls_free_open_rec(sk);
*copied -= send; *copied -= (send + delta);
err = -EACCES; err = -EACCES;
} }
......
...@@ -2268,6 +2268,19 @@ union bpf_attr { ...@@ -2268,6 +2268,19 @@ union bpf_attr {
* *
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
* Description
* Will remove *pop* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
* an allocation and copy are required due to a full ring buffer.
* However, the helper will try to avoid doing the allocation
* if possible. Other errors can occur if input parameters are
* invalid either due to *start* byte not being valid part of msg
* payload and/or *pop* value being to large.
*
* Return
* 0 on success, or a negative erro in case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2360,7 +2373,8 @@ union bpf_attr { ...@@ -2360,7 +2373,8 @@ union bpf_attr {
FN(map_push_elem), \ FN(map_push_elem), \
FN(map_pop_elem), \ FN(map_pop_elem), \
FN(map_peek_elem), \ FN(map_peek_elem), \
FN(msg_push_data), FN(msg_push_data), \
FN(msg_pop_data),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = ...@@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data; (void *) BPF_FUNC_msg_pull_data;
static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_push_data; (void *) BPF_FUNC_msg_push_data;
static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) =
(void *) BPF_FUNC_msg_pop_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind; (void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
......
...@@ -79,6 +79,8 @@ int txmsg_start; ...@@ -79,6 +79,8 @@ int txmsg_start;
int txmsg_end; int txmsg_end;
int txmsg_start_push; int txmsg_start_push;
int txmsg_end_push; int txmsg_end_push;
int txmsg_start_pop;
int txmsg_pop;
int txmsg_ingress; int txmsg_ingress;
int txmsg_skb; int txmsg_skb;
int ktls; int ktls;
...@@ -104,6 +106,8 @@ static const struct option long_options[] = { ...@@ -104,6 +106,8 @@ static const struct option long_options[] = {
{"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_end", required_argument, NULL, 'e'},
{"txmsg_start_push", required_argument, NULL, 'p'}, {"txmsg_start_push", required_argument, NULL, 'p'},
{"txmsg_end_push", required_argument, NULL, 'q'}, {"txmsg_end_push", required_argument, NULL, 'q'},
{"txmsg_start_pop", required_argument, NULL, 'w'},
{"txmsg_pop", required_argument, NULL, 'x'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
{"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 },
{"ktls", no_argument, &ktls, 1 }, {"ktls", no_argument, &ktls, 1 },
...@@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, ...@@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->end); clock_gettime(CLOCK_MONOTONIC, &s->end);
} else { } else {
int slct, recvp = 0, recv, max_fd = fd; int slct, recvp = 0, recv, max_fd = fd;
float total_bytes, txmsg_pop_total;
int fd_flags = O_NONBLOCK; int fd_flags = O_NONBLOCK;
struct timeval timeout; struct timeval timeout;
float total_bytes;
fd_set w; fd_set w;
fcntl(fd, fd_flags); fcntl(fd, fd_flags);
/* Account for pop bytes noting each iteration of apply will
* call msg_pop_data helper so we need to account for this
* by calculating the number of apply iterations. Note user
* of the tool can create cases where no data is sent by
* manipulating pop/push/pull/etc. For example txmsg_apply 1
* with txmsg_pop 1 will try to apply 1B at a time but each
* iteration will then pop 1B so no data will ever be sent.
* This is really only useful for testing edge cases in code
* paths.
*/
total_bytes = (float)iov_count * (float)iov_length * (float)cnt; total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
txmsg_pop_total = txmsg_pop;
if (txmsg_apply)
txmsg_pop_total *= (total_bytes / txmsg_apply);
total_bytes -= txmsg_pop_total;
err = clock_gettime(CLOCK_MONOTONIC, &s->start); err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0) if (err < 0)
perror("recv start time: "); perror("recv start time: ");
...@@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, ...@@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
timeout.tv_sec = 0; timeout.tv_sec = 0;
timeout.tv_usec = 300000; timeout.tv_usec = 300000;
} else { } else {
timeout.tv_sec = 1; timeout.tv_sec = 3;
timeout.tv_usec = 0; timeout.tv_usec = 0;
} }
...@@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, ...@@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
goto out_errno; goto out_errno;
} else if (!slct) { } else if (!slct) {
if (opt->verbose) if (opt->verbose)
fprintf(stderr, "unexpected timeout\n"); fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
errno = -EIO; errno = -EIO;
clock_gettime(CLOCK_MONOTONIC, &s->end); clock_gettime(CLOCK_MONOTONIC, &s->end);
goto out_errno; goto out_errno;
...@@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt) ...@@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt)
iov_count = 1; iov_count = 1;
err = msg_loop(rx_fd, iov_count, iov_buf, err = msg_loop(rx_fd, iov_count, iov_buf,
cnt, &s, false, opt); cnt, &s, false, opt);
if (err && opt->verbose) if (opt->verbose)
fprintf(stderr, fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err); iov_count, iov_buf, cnt, err);
...@@ -931,6 +949,39 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) ...@@ -931,6 +949,39 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
} }
} }
if (txmsg_start_pop) {
i = 4;
err = bpf_map_update_elem(map_fd[5],
&i, &txmsg_start_pop, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n",
txmsg_start_pop, i, err, strerror(errno));
goto out;
}
} else {
i = 4;
bpf_map_update_elem(map_fd[5],
&i, &txmsg_start_pop, BPF_ANY);
}
if (txmsg_pop) {
i = 5;
err = bpf_map_update_elem(map_fd[5],
&i, &txmsg_pop, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n",
txmsg_pop, i, err, strerror(errno));
goto out;
}
} else {
i = 5;
bpf_map_update_elem(map_fd[5],
&i, &txmsg_pop, BPF_ANY);
}
if (txmsg_ingress) { if (txmsg_ingress) {
int in = BPF_F_INGRESS; int in = BPF_F_INGRESS;
...@@ -1082,6 +1133,11 @@ static void test_options(char *options) ...@@ -1082,6 +1133,11 @@ static void test_options(char *options)
snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
strncat(options, tstr, OPTSTRING); strncat(options, tstr, OPTSTRING);
} }
if (txmsg_start_pop) {
snprintf(tstr, OPTSTRING, "pop (%d,%d),",
txmsg_start_pop, txmsg_start_pop + txmsg_pop);
strncat(options, tstr, OPTSTRING);
}
if (txmsg_ingress) if (txmsg_ingress)
strncat(options, "ingress,", OPTSTRING); strncat(options, "ingress,", OPTSTRING);
if (txmsg_skb) if (txmsg_skb)
...@@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp) ...@@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp)
txmsg_apply = txmsg_cork = 0; txmsg_apply = txmsg_cork = 0;
txmsg_start = txmsg_end = 0; txmsg_start = txmsg_end = 0;
txmsg_start_push = txmsg_end_push = 0; txmsg_start_push = txmsg_end_push = 0;
txmsg_start_pop = txmsg_pop = 0;
/* Test small and large iov_count values with pass/redir/apply/cork */ /* Test small and large iov_count values with pass/redir/apply/cork */
txmsg_pass = 1; txmsg_pass = 1;
...@@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp) ...@@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp)
txmsg_end = 2; txmsg_end = 2;
txmsg_start_push = 1; txmsg_start_push = 1;
txmsg_end_push = 2; txmsg_end_push = 2;
txmsg_start_pop = 1;
txmsg_pop = 1;
err = test_txmsg(cgrp);
if (err)
goto out;
/* Cut a byte of pushed data but leave reamining in place */
txmsg_start = 1;
txmsg_end = 2;
txmsg_start_push = 1;
txmsg_end_push = 3;
txmsg_start_pop = 1;
txmsg_pop = 1;
err = test_txmsg(cgrp); err = test_txmsg(cgrp);
if (err) if (err)
goto out; goto out;
...@@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp) ...@@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp)
opt.iov_length = 100; opt.iov_length = 100;
txmsg_cork = 1600; txmsg_cork = 1600;
txmsg_start_pop = 0;
txmsg_pop = 0;
for (i = 99; i <= 1600; i += 500) { for (i = 99; i <= 1600; i += 500) {
txmsg_start = 0; txmsg_start = 0;
txmsg_end = i; txmsg_end = i;
...@@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp) ...@@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp)
goto out; goto out;
} }
/* Test pop data in middle of cork */
for (i = 99; i <= 1600; i += 500) {
txmsg_start_pop = 10;
txmsg_pop = i;
err = test_exec(cgrp, &opt);
if (err)
goto out;
}
txmsg_start_pop = 0;
txmsg_pop = 0;
/* Test start/end with cork but pull data in middle */ /* Test start/end with cork but pull data in middle */
for (i = 199; i <= 1600; i += 500) { for (i = 199; i <= 1600; i += 500) {
txmsg_start = 100; txmsg_start = 100;
...@@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp) ...@@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp)
if (err) if (err)
goto out; goto out;
/* Test pop with cork pulling last sg entry */
txmsg_start_pop = 1500;
txmsg_pop = 1600;
err = test_exec(cgrp, &opt);
if (err)
goto out;
txmsg_start_pop = 0;
txmsg_pop = 0;
/* Test start/end pull of single byte in last page */ /* Test start/end pull of single byte in last page */
txmsg_start = 1111; txmsg_start = 1111;
txmsg_end = 1112; txmsg_end = 1112;
...@@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp) ...@@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp)
if (err) if (err)
goto out; goto out;
/* Test pop of single byte in last page */
txmsg_start_pop = 1111;
txmsg_pop = 1112;
err = test_exec(cgrp, &opt);
if (err)
goto out;
/* Test start/end with end < start */ /* Test start/end with end < start */
txmsg_start = 1111; txmsg_start = 1111;
txmsg_end = 0; txmsg_end = 0;
...@@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp) ...@@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp)
txmsg_start_push = 1601; txmsg_start_push = 1601;
txmsg_end_push = 1600; txmsg_end_push = 1600;
err = test_exec(cgrp, &opt); err = test_exec(cgrp, &opt);
if (err)
goto out;
/* Test pop with start > data */
txmsg_start_pop = 1601;
txmsg_pop = 1;
err = test_exec(cgrp, &opt);
if (err)
goto out;
/* Test pop with pop range > data */
txmsg_start_pop = 1599;
txmsg_pop = 10;
err = test_exec(cgrp, &opt);
out: out:
txmsg_start = 0; txmsg_start = 0;
txmsg_end = 0; txmsg_end = 0;
...@@ -1641,6 +1754,12 @@ int main(int argc, char **argv) ...@@ -1641,6 +1754,12 @@ int main(int argc, char **argv)
case 'q': case 'q':
txmsg_end_push = atoi(optarg); txmsg_end_push = atoi(optarg);
break; break;
case 'w':
txmsg_start_pop = atoi(optarg);
break;
case 'x':
txmsg_pop = atoi(optarg);
break;
case 'a': case 'a':
txmsg_apply = atoi(optarg); txmsg_apply = atoi(optarg);
break; break;
......
...@@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = { ...@@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int), .key_size = sizeof(int),
.value_size = sizeof(int), .value_size = sizeof(int),
.max_entries = 4 .max_entries = 6
}; };
struct bpf_map_def SEC("maps") sock_redir_flags = { struct bpf_map_def SEC("maps") sock_redir_flags = {
...@@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) ...@@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
SEC("sk_msg1") SEC("sk_msg1")
int bpf_prog4(struct sk_msg_md *msg) int bpf_prog4(struct sk_msg_md *msg)
{ {
int *bytes, zero = 0, one = 1, two = 2, three = 3; int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
int *start, *end, *start_push, *end_push; int *start, *end, *start_push, *end_push, *start_pop, *pop;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes) if (bytes)
...@@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg) ...@@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg)
end_push = bpf_map_lookup_elem(&sock_bytes, &three); end_push = bpf_map_lookup_elem(&sock_bytes, &three);
if (start_push && end_push) if (start_push && end_push)
bpf_msg_push_data(msg, *start_push, *end_push, 0); bpf_msg_push_data(msg, *start_push, *end_push, 0);
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
bpf_msg_pop_data(msg, *start_pop, *pop, 0);
return SK_PASS; return SK_PASS;
} }
SEC("sk_msg2") SEC("sk_msg2")
int bpf_prog5(struct sk_msg_md *msg) int bpf_prog5(struct sk_msg_md *msg)
{ {
int zero = 0, one = 1, two = 2, three = 3; int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
int *start, *end, *start_push, *end_push; int *start, *end, *start_push, *end_push, *start_pop, *pop;
int *bytes, len1, len2 = 0, len3; int *bytes, len1, len2 = 0, len3, len4;
int err1 = -1, err2 = -1; int err1 = -1, err2 = -1;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
...@@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg) ...@@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length push_update %i->%i\n", bpf_printk("sk_msg2: length push_update %i->%i\n",
len2 ? len2 : len1, len3); len2 ? len2 : len1, len3);
} }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop) {
int err;
bpf_printk("sk_msg2: pop(%i@%i)\n",
start_pop, pop);
err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
if (err)
bpf_printk("sk_msg2: pop_data err %i\n", err);
len4 = (__u64)msg->data_end - (__u64)msg->data;
bpf_printk("sk_msg2: length pop_data %i->%i\n",
len1 ? len1 : 0, len4);
}
bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
len1, err1, err2); len1, err1, err2);
...@@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) ...@@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg)
SEC("sk_msg3") SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg) int bpf_prog6(struct sk_msg_md *msg)
{ {
int *bytes, *start, *end, *start_push, *end_push, *f; int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
int zero = 0, one = 1, two = 2, three = 3, key = 0; int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
__u64 flags = 0; __u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
...@@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg) ...@@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg)
if (start_push && end_push) if (start_push && end_push)
bpf_msg_push_data(msg, *start_push, *end_push, 0); bpf_msg_push_data(msg, *start_push, *end_push, 0);
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
bpf_msg_pop_data(msg, *start_pop, *pop, 0);
f = bpf_map_lookup_elem(&sock_redir_flags, &zero); f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) { if (f && *f) {
key = 2; key = 2;
...@@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg) ...@@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg)
SEC("sk_msg4") SEC("sk_msg4")
int bpf_prog7(struct sk_msg_md *msg) int bpf_prog7(struct sk_msg_md *msg)
{ {
int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
int *bytes, *start, *end, *start_push, *end_push, *f; int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
int len1, len2 = 0, len3, len4;
int err1 = 0, err2 = 0, key = 0; int err1 = 0, err2 = 0, key = 0;
__u64 flags = 0; __u64 flags = 0;
...@@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg) ...@@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg)
len2 ? len2 : len1, len3); len2 ? len2 : len1, len3);
} }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop) {
int err;
bpf_printk("sk_msg4: pop(%i@%i)\n",
start_pop, pop);
err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
if (err)
bpf_printk("sk_msg4: pop_data err %i\n", err);
len4 = (__u64)msg->data_end - (__u64)msg->data;
bpf_printk("sk_msg4: length pop_data %i->%i\n",
len1 ? len1 : 0, len4);
}
f = bpf_map_lookup_elem(&sock_redir_flags, &zero); f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) { if (f && *f) {
key = 2; key = 2;
...@@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) ...@@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg)
SEC("sk_msg7") SEC("sk_msg7")
int bpf_prog10(struct sk_msg_md *msg) int bpf_prog10(struct sk_msg_md *msg)
{ {
int *bytes, *start, *end, *start_push, *end_push; int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
int zero = 0, one = 1, two = 2, three = 3; int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes) if (bytes)
...@@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg) ...@@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg)
end_push = bpf_map_lookup_elem(&sock_bytes, &three); end_push = bpf_map_lookup_elem(&sock_bytes, &three);
if (start_push && end_push) if (start_push && end_push)
bpf_msg_push_data(msg, *start_push, *end_push, 0); bpf_msg_push_data(msg, *start_push, *end_push, 0);
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
bpf_msg_pop_data(msg, *start_pop, *pop, 0);
bpf_printk("return sk drop\n");
return SK_DROP; return SK_DROP;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册