提交 b4ff7ad7 编写于 作者: A Alexei Starovoitov

Merge branch 'libbpf-support-bpf-to-bpf-calls'

Andrii Nakryiko says:

====================
Currently, libbpf supports a limited form of BPF-to-BPF subprogram calls. The
restriction is that entry-point BPF program should use *all* of defined
sub-programs in BPF .o file. If any of the subprograms is not used, such
entry-point BPF program will be rejected by verifier as containing unreachable
dead code. This is not a big limitation for cases with single entry-point BPF
programs, but is quite a heavy restriction for multi-programs that use only
partially overlapping set of subprograms.

This patch set removes all such restrictions and adds complete support for
using BPF sub-program calls on BPF side. This is achieved through libbpf
tracking subprograms individually and detecting which subprograms are used by
any given entry-point BPF program, and subsequently only appending and
relocating code for just those used subprograms.

In addition, libbpf now also supports multiple entry-point BPF programs within
the same ELF section. This allows to structure code so that there are few
variants of BPF programs of the same type and attaching to the same target
(e.g., for tracepoints and kprobes) without the need to worry about ELF
section name clashes.

This patch set opens way for more wider adoption of BPF subprogram calls,
especially for real-world production use-cases with complicated net of
subprograms. This will allow to further scale BPF verification process through
good use of global functions, which can be verified independently. This is
also important prerequisite for static linking which allows static BPF
libraries to not worry about naming clashes for section names, as well as use
static non-inlined functions (subprograms) without worries of verifier
rejecting program due to dead code.

Patch set is structured as follows:
- patched 1-6 contain all the libbpf changes necessary to support multi-prog
  sections and bpf2bpf subcalls;
- patch 7 adds dedicated selftests validating all combinations of possible
  sub-calls (within and across sections, static vs global functions);
- patch 8 deprecated bpf_program__title() in favor of
  bpf_program__section_name(). The intent was to also deprecate
  bpf_object__find_program_by_title() as it's now non-sensical with multiple
  programs per section. But there were too many selftests uses of this and
  I didn't want to delay this patches further and make it even bigger, so left
  it for a follow up cleanup;
- patches 9-10 remove uses for title-related APIs from bpftool and
  bpf_program__title() use from selftests;
- patch 11 is converting fexit_bpf2bpf to have explicit subtest (it does
  contain 4 subtests, which are not handled as sub-tests);
- patches 12-14 convert few complicated BPF selftests to use __noinline
  functions to further validate correctness of libbpf's bpf2bpf processing
  logic.

v2->v3:
  - explained subprog relocation algorithm in more details (Alexei);
  - pyperf, strobelight and cls_redirect got new subprog variants, leaving
    other modes intact (Alexei);
v1->v2:
  - rename DEPRECATED to LIBBPF_DEPRECATED to avoid name clashes;
  - fix test_subprogs build;
  - convert a bunch of complicated selftests to __noinline (Alexei).
====================
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
......@@ -1304,7 +1304,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
enum bpf_prog_type prog_type = common_prog_type;
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
const char *sec_name = bpf_program__title(pos, false);
const char *sec_name = bpf_program__section_name(pos);
err = get_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
......@@ -1398,7 +1398,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
if (err) {
p_err("failed to pin program %s",
bpf_program__title(prog, false));
bpf_program__section_name(prog));
goto err_close_obj;
}
} else {
......
......@@ -57,14 +57,16 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **func_info, __u32 *cnt);
LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **line_info, __u32 *cnt);
LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **func_info, __u32 *cnt);
LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
int btf_ext__reloc_line_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **line_info, __u32 *cnt);
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
......
此差异已折叠。
......@@ -198,8 +198,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
bool needs_copy);
LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
......
......@@ -302,6 +302,7 @@ LIBBPF_0.1.0 {
LIBBPF_0.2.0 {
global:
bpf_program__section_name;
perf_buffer__buffer_cnt;
perf_buffer__buffer_fd;
perf_buffer__epoll_fd;
......
......@@ -15,6 +15,8 @@
#define LIBBPF_API __attribute__((visibility("default")))
#endif
#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
......
......@@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj,
if (ret)
return ret;
main_prog = bpf_object__find_program_by_title(*obj, section_name);
main_prog = NULL;
bpf_object__for_each_program(prog, *obj) {
if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
main_prog = prog;
break;
}
}
if (!main_prog)
return -1;
......
......@@ -49,6 +49,7 @@ void test_bpf_verif_scale(void)
{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* full unroll by llvm */
{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
......@@ -86,6 +87,9 @@ void test_bpf_verif_scale(void)
{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* non-inlined subprogs */
{ "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
......
......@@ -12,10 +12,13 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
#define ENCAP_PORT (1234)
static int duration = 0;
struct addr_port {
in_port_t port;
union {
......@@ -361,30 +364,18 @@ static void close_fds(int *fds, int n)
close(fds[i]);
}
void test_cls_redirect(void)
static void test_cls_redirect_common(struct bpf_program *prog)
{
struct test_cls_redirect *skel = NULL;
struct bpf_prog_test_run_attr tattr = {};
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
struct sockaddr *addr;
socklen_t slen;
int i, j, err;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
skel = test_cls_redirect__open();
if (CHECK_FAIL(!skel))
return;
skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
if (CHECK_FAIL(test_cls_redirect__load(skel)))
goto cleanup;
addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
......@@ -402,7 +393,7 @@ void test_cls_redirect(void)
goto cleanup;
}
tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
tattr.prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
......@@ -450,7 +441,58 @@ void test_cls_redirect(void)
}
cleanup:
test_cls_redirect__destroy(skel);
close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
static void test_cls_redirect_inlined(void)
{
struct test_cls_redirect *skel;
int err;
skel = test_cls_redirect__open();
if (CHECK(!skel, "skel_open", "failed\n"))
return;
skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
err = test_cls_redirect__load(skel);
if (CHECK(err, "skel_load", "failed: %d\n", err))
goto cleanup;
test_cls_redirect_common(skel->progs.cls_redirect);
cleanup:
test_cls_redirect__destroy(skel);
}
static void test_cls_redirect_subprogs(void)
{
struct test_cls_redirect_subprogs *skel;
int err;
skel = test_cls_redirect_subprogs__open();
if (CHECK(!skel, "skel_open", "failed\n"))
return;
skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
err = test_cls_redirect_subprogs__load(skel);
if (CHECK(err, "skel_load", "failed: %d\n", err))
goto cleanup;
test_cls_redirect_common(skel->progs.cls_redirect);
cleanup:
test_cls_redirect_subprogs__destroy(skel);
}
void test_cls_redirect(void)
{
if (test__start_subtest("cls_redirect_inlined"))
test_cls_redirect_inlined();
if (test__start_subtest("cls_redirect_subprogs"))
test_cls_redirect_subprogs();
}
......@@ -208,11 +208,18 @@ static void test_func_map_prog_compatibility(void)
void test_fexit_bpf2bpf(void)
{
test_target_no_callees();
test_target_yes_callees();
test_func_replace();
test_func_replace_verify();
test_func_sockmap_update();
test_func_replace_return_code();
test_func_map_prog_compatibility();
if (test__start_subtest("target_no_callees"))
test_target_no_callees();
if (test__start_subtest("target_yes_callees"))
test_target_yes_callees();
if (test__start_subtest("func_replace"))
test_func_replace();
if (test__start_subtest("func_replace_verify"))
test_func_replace_verify();
if (test__start_subtest("func_sockmap_update"))
test_func_sockmap_update();
if (test__start_subtest("func_replace_return_code"))
test_func_replace_return_code();
if (test__start_subtest("func_map_prog_compatibility"))
test_func_map_prog_compatibility();
}
......@@ -80,9 +80,8 @@ static void test_l4lb(const char *file)
void test_l4lb_all(void)
{
const char *file1 = "./test_l4lb.o";
const char *file2 = "./test_l4lb_noinline.o";
test_l4lb(file1);
test_l4lb(file2);
if (test__start_subtest("l4lb_inline"))
test_l4lb("test_l4lb.o");
if (test__start_subtest("l4lb_noinline"))
test_l4lb("test_l4lb_noinline.o");
}
......@@ -27,7 +27,7 @@ void test_reference_tracking(void)
const char *title;
/* Ignore .text sections */
title = bpf_program__title(prog, false);
title = bpf_program__section_name(prog);
if (strstr(title, ".text") != NULL)
continue;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
#include <time.h>
#include "test_subprogs.skel.h"
static int duration;
void test_subprogs(void)
{
struct test_subprogs *skel;
int err;
skel = test_subprogs__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
err = test_subprogs__attach(skel);
if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
goto cleanup;
usleep(1);
CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
cleanup:
test_subprogs__destroy(skel);
}
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include "test_xdp_noinline.skel.h"
void test_xdp_noinline(void)
{
const char *file = "./test_xdp_noinline.o";
unsigned int nr_cpus = bpf_num_possible_cpus();
struct test_xdp_noinline *skel;
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
......@@ -25,58 +26,42 @@ void test_xdp_noinline(void)
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
__u32 duration, retval, size;
int err, i, prog_fd, map_fd;
int err, i;
__u64 bytes = 0, pkts = 0;
struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
skel = test_xdp_noinline__open_and_load();
if (CHECK(!skel, "skel_open_and_load", "failed\n"))
return;
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &key, &value, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
map_fd = bpf_find_map(__func__, obj, "ch_rings");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
map_fd = bpf_find_map(__func__, obj, "reals");
if (map_fd < 0)
goto out;
bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
map_fd = bpf_find_map(__func__, obj, "stats");
if (map_fd < 0)
goto out;
bpf_map_lookup_elem(map_fd, &stats_key, stats);
bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
pkts != NUM_ITER * 2)) {
printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
bytes, pkts);
}
out:
bpf_object__close(obj);
CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
"stats", "bytes %lld pkts %lld\n",
(unsigned long long)bytes, (unsigned long long)pkts);
test_xdp_noinline__destroy(skel);
}
......@@ -67,7 +67,12 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
#ifdef SUBPROGS
__noinline
#else
__always_inline
#endif
static void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
......@@ -155,7 +160,9 @@ struct {
} stackmap SEC(".maps");
#ifdef GLOBAL_FUNC
__attribute__((noinline))
__noinline
#elif defined(SUBPROGS)
static __noinline
#else
static __always_inline
#endif
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#define STACK_MAX_LEN 50
#define SUBPROGS
#include "pyperf.h"
......@@ -266,8 +266,12 @@ struct tls_index {
uint64_t offset;
};
static __always_inline void *calc_location(struct strobe_value_loc *loc,
void *tls_base)
#ifdef SUBPROGS
__noinline
#else
__always_inline
#endif
static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
{
/*
* tls_mode value is:
......@@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
: NULL;
}
static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data)
#ifdef SUBPROGS
__noinline
#else
__always_inline
#endif
static void read_int_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
......@@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
static __always_inline void *read_strobe_meta(struct task_struct *task,
struct strobemeta_payload *data)
#ifdef SUBPROGS
__noinline
#else
__always_inline
#endif
static void *read_strobe_meta(struct task_struct *task,
struct strobemeta_payload *data)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
......
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2019 Facebook
#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 13
#define STROBE_MAX_MAP_ENTRIES 20
#define NO_UNROLL
#define SUBPROGS
#include "strobemeta.h"
......@@ -22,6 +22,12 @@
#include "test_cls_redirect.h"
#ifdef SUBPROGS
#define INLINING __noinline
#else
#define INLINING __always_inline
#endif
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
......@@ -125,7 +131,7 @@ typedef struct buf {
uint8_t *const tail;
} buf_t;
static size_t buf_off(const buf_t *buf)
static __always_inline size_t buf_off(const buf_t *buf)
{
/* Clang seems to optimize constructs like
* a - b + c
......@@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf)
return off;
}
static bool buf_copy(buf_t *buf, void *dst, size_t len)
static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
{
if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
return false;
......@@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
return true;
}
static bool buf_skip(buf_t *buf, const size_t len)
static __always_inline bool buf_skip(buf_t *buf, const size_t len)
{
/* Check whether off + len is valid in the non-linear part. */
if (buf_off(buf) + len > buf->skb->len) {
......@@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
* If scratch is not NULL, the function will attempt to load non-linear
* data via bpf_skb_load_bytes. On success, scratch is returned.
*/
static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
{
if (buf->head + len > buf->tail) {
if (scratch == NULL) {
......@@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
return ptr;
}
static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
{
if (ipv4->ihl <= 5) {
return true;
......@@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
return buf_skip(buf, (ipv4->ihl - 5) * 4);
}
static bool ipv4_is_fragment(const struct iphdr *ip)
static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
{
uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
}
static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
{
struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
if (ipv4 == NULL) {
......@@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
}
/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
{
if (!buf_copy(pkt, ports, sizeof(*ports))) {
return false;
......@@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
return true;
}
static uint16_t pkt_checksum_fold(uint32_t csum)
static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
{
/* The highest reasonable value for an IPv4 header
* checksum requires two folds, so we just do that always.
......@@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
return (uint16_t)~csum;
}
static void pkt_ipv4_checksum(struct iphdr *iph)
static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
{
iph->check = 0;
......@@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
iph->check = pkt_checksum_fold(acc);
}
static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
const struct ipv6hdr *ipv6,
uint8_t *upper_proto,
bool *is_fragment)
static INLINING
bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
const struct ipv6hdr *ipv6,
uint8_t *upper_proto,
bool *is_fragment)
{
/* We understand five extension headers.
* https://tools.ietf.org/html/rfc8200#section-4.1 states that all
......@@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
* scratch is allocated on the stack. However, this usage should be safe since
* it's the callers stack after all.
*/
static inline __attribute__((__always_inline__)) struct ipv6hdr *
static __always_inline struct ipv6hdr *
pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
bool *is_fragment)
{
......@@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
/* Global metrics, per CPU
*/
struct bpf_map_def metrics_map SEC("maps") = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(unsigned int),
.value_size = sizeof(metrics_t),
.max_entries = 1,
};
static metrics_t *get_global_metrics(void)
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, unsigned int);
__type(value, metrics_t);
} metrics_map SEC(".maps");
static INLINING metrics_t *get_global_metrics(void)
{
uint64_t key = 0;
return bpf_map_lookup_elem(&metrics_map, &key);
}
static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
{
const int payload_off =
sizeof(*encap) +
......@@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
}
static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
struct in_addr *next_hop, metrics_t *metrics)
static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
struct in_addr *next_hop, metrics_t *metrics)
{
metrics->forwarded_packets_total_gre++;
......@@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
struct in_addr *next_hop, metrics_t *metrics)
static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
struct in_addr *next_hop, metrics_t *metrics)
{
/* swap L2 addresses */
/* This assumes that packets are received from a router.
......@@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
static ret_t skip_next_hops(buf_t *pkt, int n)
static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
{
switch (n) {
case 1:
......@@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
* pkt is positioned just after the variable length GLB header
* iff the call is successful.
*/
static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
struct in_addr *next_hop)
static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
struct in_addr *next_hop)
{
if (encap->unigue.next_hop > encap->unigue.hop_count) {
return TC_ACT_SHOT;
......@@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
* return value, and calling code works while still being "generic" to
* IPv4 and IPv6.
*/
static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
uint64_t iphlen, uint16_t sport, uint16_t dport)
static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
uint64_t iphlen, uint16_t sport, uint16_t dport)
{
switch (iphlen) {
case sizeof(struct iphdr): {
......@@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
}
}
static verdict_t classify_tcp(struct __sk_buff *skb,
struct bpf_sock_tuple *tuple, uint64_t tuplen,
void *iph, struct tcphdr *tcp)
static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
struct bpf_sock_tuple *tuple, uint64_t tuplen,
void *iph, struct tcphdr *tcp)
{
struct bpf_sock *sk =
bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
......@@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
return UNKNOWN;
}
static verdict_t classify_udp(struct __sk_buff *skb,
struct bpf_sock_tuple *tuple, uint64_t tuplen)
static INLINING verdict_t classify_udp(struct __sk_buff *skb,
struct bpf_sock_tuple *tuple, uint64_t tuplen)
{
struct bpf_sock *sk =
bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
......@@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
return UNKNOWN;
}
static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
struct bpf_sock_tuple *tuple, uint64_t tuplen,
metrics_t *metrics)
static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
struct bpf_sock_tuple *tuple, uint64_t tuplen,
metrics_t *metrics)
{
switch (proto) {
case IPPROTO_TCP:
......@@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
}
}
static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
{
struct icmphdr icmp;
if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
......@@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
sizeof(tuple.ipv4), metrics);
}
static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
{
struct icmp6hdr icmp6;
if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
......@@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
metrics);
}
static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
metrics_t *metrics)
static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
metrics_t *metrics)
{
metrics->l4_protocol_packets_total_tcp++;
......@@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
}
static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
metrics_t *metrics)
static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
metrics_t *metrics)
{
metrics->l4_protocol_packets_total_udp++;
......@@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_udp(pkt->skb, &tuple, tuplen);
}
static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv4++;
......@@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
}
}
static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv6++;
......
#define SUBPROGS
#include "test_cls_redirect.c"
......@@ -17,9 +17,7 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
static __u32 rol32(__u32 word, unsigned int shift)
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
......@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
static u32 jhash(const void *key, u32 length, u32 initval)
static __noinline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
......@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
b += initval;
......@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
static u32 jhash_2words(u32 a, u32 b, u32 initval)
static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}
......@@ -200,8 +198,7 @@ struct {
__type(value, struct ctl_value);
} ctl_array SEC(".maps");
static __u32 get_packet_hash(struct packet_description *pckt,
bool ipv6)
static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
{
if (ipv6)
return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
......@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
}
static bool get_packet_dst(struct real_definition **real,
struct packet_description *pckt,
struct vip_meta *vip_info,
bool is_ipv6)
static __noinline bool get_packet_dst(struct real_definition **real,
struct packet_description *pckt,
struct vip_meta *vip_info,
bool is_ipv6)
{
__u32 hash = get_packet_hash(pckt, is_ipv6);
__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
......@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
return true;
}
static int parse_icmpv6(void *data, void *data_end, __u64 off,
struct packet_description *pckt)
static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
struct packet_description *pckt)
{
struct icmp6hdr *icmp_hdr;
struct ipv6hdr *ip6h;
......@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
static int parse_icmp(void *data, void *data_end, __u64 off,
struct packet_description *pckt)
static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
struct packet_description *pckt)
{
struct icmphdr *icmp_hdr;
struct iphdr *iph;
......@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
static bool parse_udp(void *data, __u64 off, void *data_end,
struct packet_description *pckt)
static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
struct packet_description *pckt)
{
struct udphdr *udp;
udp = data + off;
......@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
return true;
}
static bool parse_tcp(void *data, __u64 off, void *data_end,
struct packet_description *pckt)
static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
struct packet_description *pckt)
{
struct tcphdr *tcp;
......@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
return true;
}
static int process_packet(void *data, __u64 off, void *data_end,
bool is_ipv6, struct __sk_buff *skb)
static __noinline int process_packet(void *data, __u64 off, void *data_end,
bool is_ipv6, struct __sk_buff *skb)
{
void *pkt_start = (void *)(long)skb->data;
struct packet_description pckt = {};
......
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
const char LICENSE[] SEC("license") = "GPL";
__noinline int sub1(int x)
{
return x + 1;
}
static __noinline int sub5(int v);
__noinline int sub2(int y)
{
return sub5(y + 2);
}
static __noinline int sub3(int z)
{
return z + 3 + sub1(4);
}
static __noinline int sub4(int w)
{
return w + sub3(5) + sub1(6);
}
/* sub5() is an identitify function, just to test weirder functions layout and
* call patterns
*/
static __noinline int sub5(int v)
{
return sub1(v) - 1; /* compensates sub1()'s + 1 */
}
/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
* type, so we need to accept pointer as integer and then cast it inside the
* function
*/
__noinline int get_task_tgid(uintptr_t t)
{
/* this ensures that CO-RE relocs work in multi-subprogs .text */
return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
}
int res1 = 0;
int res2 = 0;
int res3 = 0;
int res4 = 0;
SEC("raw_tp/sys_enter")
int prog1(void *ctx)
{
/* perform some CO-RE relocations to ensure they work with multi-prog
* sections correctly
*/
struct task_struct *t = (void *)bpf_get_current_task();
if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
return 1;
res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
return 0;
}
SEC("raw_tp/sys_exit")
int prog2(void *ctx)
{
struct task_struct *t = (void *)bpf_get_current_task();
if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
return 1;
res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
return 0;
}
/* prog3 has the same section name as prog1 */
SEC("raw_tp/sys_enter")
int prog3(void *ctx)
{
struct task_struct *t = (void *)bpf_get_current_task();
if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
return 1;
res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
return 0;
}
/* prog4 has the same section name as prog2 */
SEC("raw_tp/sys_exit")
int prog4(void *ctx)
{
struct task_struct *t = (void *)bpf_get_current_task();
if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
return 1;
res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
return 0;
}
......@@ -16,7 +16,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
static __u32 rol32(__u32 word, unsigned int shift)
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
......@@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
static __attribute__ ((noinline))
static __noinline
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
......@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
__attribute__ ((noinline))
__noinline
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
......@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
__attribute__ ((noinline))
__noinline
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
......@@ -213,7 +213,7 @@ struct eth_hdr {
unsigned short eth_proto;
};
static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
{
__u64 off = sizeof(struct eth_hdr);
if (is_ipv6) {
......@@ -797,8 +797,8 @@ static int process_packet(void *data, __u64 off, void *data_end,
return XDP_DROP;
}
__attribute__ ((section("xdp-test"), used))
int balancer_ingress(struct xdp_md *ctx)
SEC("xdp-test-v4")
int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
......@@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx)
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IP)
return process_packet(data, nh_off, data_end, 0, ctx);
else if (eth_proto == ETH_P_IPV6)
else
return XDP_DROP;
}
SEC("xdp-test-v6")
int balancer_ingress_v6(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
struct eth_hdr *eth = data;
__u32 eth_proto;
__u32 nh_off;
nh_off = sizeof(struct eth_hdr);
if (data + nh_off > data_end)
return XDP_DROP;
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IPV6)
return process_packet(data, nh_off, data_end, 1, ctx);
else
return XDP_DROP;
}
char _license[] __attribute__ ((section("license"), used)) = "GPL";
int _version __attribute__ ((section("version"), used)) = 1;
char _license[] SEC("license") = "GPL";
......@@ -151,7 +151,7 @@ static int run_test(int cgfd)
}
bpf_object__for_each_program(prog, pobj) {
prog_name = bpf_program__title(prog, /*needs_copy*/ false);
prog_name = bpf_program__section_name(prog);
if (libbpf_attach_type_by_name(prog_name, &attach_type))
goto err;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册