提交 5d35ccfc 编写于 作者: X Xu Kuohai 提交者: Zheng Zengkai

samples: bpf: Add sample BMC for Redis

Offering: HULK
hulk inclusion
category: feature
bugzilla: N/A

--------------------------------

BMC is an in-kernel key-value cache implemented in BPF and proposed by
paper [1]. The paper discussed BMC for memcached, obtaining at least
6x performance speedup.

This patch implements a sample BMC for Redis.

See [2] for details on how to build samples/bpf.

Output files:
 samples/bpf/bmctool
 samples/bpf/bmc/bpf.o

Sample usage:
 bmctool prog load -p 6379 ./bmc/bpf.o  # load bmc bpf prog and attach it
                                        # to sockets with listen port 6379

 bmctool stat                           # dump bmc status

 bmctool prog unload                    # detach and unload bmc prog

Tested with the following command:

 ./redis-benchmark -c 20 -r 1 -n 1000 -t get  -h 192.168.4.101 -d 102

Without BMC:
  throughput summary: 41666.67 requests per second
  latency summary (msec):
          avg       min       p50       p95       p99       max
        0.441     0.176     0.415     0.631     1.455     1.815

With BMC (100% HIT):
  throughput summary: 66666.67 requests per second
  latency summary (msec):
          avg       min       p50       p95       p99       max
        0.223     0.096     0.215     0.311     0.743     0.759

BMC Stat:
 Total GET Requests: 1000
 Hit GET Requests: 1000 (100.00%)
 Dropped GET Requests: 0 (0.00%)
 Total SET Requests: 1
 Hit SET Requests: 1 (100.00%)
 Dropped SET Requests: 0 (0.00%)

[1] https://www.usenix.org/conference/nsdi21/presentation/ghigoff
[2] https://www.kernel.org/doc/readme/samples-bpf-README.rstSigned-off-by: NXu Kuohai <xukuohai@huawei.com>
Signed-off-by: NYang Jihong <yangjihong@huawei.com>
Signed-off-by: He Fengqing <hefengqing@huawei.com> (original demo)
上级 7afd4a50
......@@ -57,6 +57,7 @@ tprogs-y += hbm
tprogs-y += sched_preempt
tprogs-y += sched_select_core
tprogs-y += sched_pick_task
tprogs-y := bmctool
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
......@@ -117,6 +118,7 @@ hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
sched_preempt-objs := sched_preempt_user.o
sched_select_core-objs := sched_select_core_user.o
sched_pick_task-objs := sched_pick_task_user.o
bmctool-objs := bmc/tool.o
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
......@@ -181,6 +183,7 @@ always-y += xdpsock_kern.o
always-y += sched_preempt_kern.o
always-y += sched_select_core_kern.o
always-y += sched_pick_task_kern.o
always-y += bmc/bpf.o
ifeq ($(ARCH), arm)
# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved.
*
* Description: BPF program to accelerate Redis. The idea is to add a kernel
* cache for Redis data. When new Redis request is received, the kernel cache
* is checked, and if the requested data is found in the cache, a Redis reply
* message is constructed and sent back directly.
*/
#include <uapi/linux/in.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include "common.h"
#define BMC_MAX_REDIS_KEY_LEN 64
#define BMC_MAX_REDIS_VALUE_LEN 128
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
__uint(max_entries, 16);
} bmc_ports SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
__uint(max_entries, 1);
} bmc_interface SEC(".maps");
struct redis_key {
u32 len;
/* encoded in redis format */
u8 data[BMC_MAX_REDIS_KEY_LEN + 16];
};
struct redis_value {
u32 len;
/* encoded in redis format */
u8 data[BMC_MAX_REDIS_VALUE_LEN + 16];
};
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(key_size, sizeof(struct redis_key));
__uint(value_size, sizeof(struct redis_value));
__uint(max_entries, 10000);
} bmc_storage SEC(".maps");
struct redis_ctx {
struct redis_key key;
struct redis_value value;
u32 offset;
};
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct redis_ctx));
__uint(max_entries, 1);
} ctxmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct redis_bmc_stat));
__uint(max_entries, 1);
} bmc_stats SEC(".maps");
static inline struct redis_ctx *get_ctx(void)
{
u32 key = 0;
return bpf_map_lookup_elem(&ctxmap, &key);
}
static inline struct redis_bmc_stat *get_stat(void)
{
u32 key = 0;
return bpf_map_lookup_elem(&bmc_stats, &key);
}
static bool is_bmc_port(u32 port)
{
u32 *val = bpf_map_lookup_elem(&bmc_ports, &port);
return val != NULL && *val != 0;
}
static inline void compute_ip_checksum(struct iphdr *ip)
{
u32 csum = 0;
u16 *next_ip_u16 = (u16 *)ip;
ip->check = 0;
#pragma clang loop unroll(full)
for (int i = 0; i < (sizeof(*ip) >> 1); i++)
csum += *next_ip_u16++;
ip->check = ~((csum & 0xffff) + (csum >> 16));
}
static inline void compute_tcp_checksum(struct iphdr *ip, struct tcphdr *tcp,
__u16 len, void *data_end)
{
struct tcp_psedu_head {
__be32 saddr;
__be32 daddr;
__u8 zero;
__u8 proto;
__u16 tcplen;
};
struct tcp_psedu_head psedu;
char *tail = NULL;
char left_over[2] = {0};
psedu.saddr = ip->saddr;
psedu.daddr = ip->daddr;
psedu.zero = 0;
psedu.proto = 6;
psedu.tcplen = bpf_htons(len);
tcp->check = 0;
u32 csum = 0;
u16 *next_u16 = (u16 *)&psedu;
unsigned int i;
#pragma clang loop unroll(full)
for (i = 0; i < (sizeof(struct tcp_psedu_head) >> 1); i++)
csum += *next_u16++;
next_u16 = (u16 *)tcp;
for (i = 0; i < 1024 && (i < len / 2); i++) {
if (next_u16 + 1 > data_end)
break;
csum += *next_u16++;
}
if (len % 2 == 1) {
tail = (char *)next_u16;
if (tail < data_end)
left_over[0] = *tail;
csum += *(unsigned short *)left_over;
}
csum = (csum >> 16) + (csum & 0xffff); /* add in accumulated carries */
csum += csum >> 16; /* add potential last carry */
tcp->check = (0xffff & ~csum);
}
#define extract_kvdata(field, size, kv_data, kv_len) \
do { \
kv_data = payload; \
kv_len = 0; \
\
if (payload + 1 > data_end || payload[0] != '$') \
return XDP_PASS; \
\
payload++; \
if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \
kv_len = kv_len * 10 + (payload[0] - '0'); \
payload++; \
} \
\
if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \
kv_len = kv_len * 10 + (payload[0] - '0'); \
payload++; \
} \
\
if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \
kv_len = kv_len * 10 + (payload[0] - '0'); \
payload++; \
} \
\
if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \
kv_len = kv_len * 10 + (payload[0] - '0'); \
payload++; \
} \
\
if (payload + 2 > data_end || payload[0] != '\r' || payload[1] != '\n') \
return XDP_PASS; \
\
payload += 2; \
\
if (kv_len == 0 || kv_len > size) \
return XDP_PASS; \
\
payload += kv_len + 2; \
kv_len = payload - kv_data; \
\
if (kv_len > sizeof(ctx->field.data)) \
return XDP_PASS; \
\
bpf_xdp_load_bytes(xdp, kv_data - data, ctx->field.data, kv_len); \
ctx->field.len = kv_len; \
} while (0)
#define adjust_xdp_tail(size, len) \
do { \
char *new_end; \
\
new_end = payload = (char *)thdr + thdr->doff * 4; \
for (i = 0; i < size && i < len; i++) \
new_end++; \
\
if (new_end > data_end) \
err = bpf_xdp_adjust_tail(xdp, new_end - data_end); \
else if (new_end < data_end) \
err = bpf_xdp_adjust_tail(xdp, -(data_end - new_end)); \
\
if (err) \
return XDP_PASS; \
} while (0)
#define sync_tcp_seq(len, ndrop) \
do { \
struct bpf_sock_tuple tuple; \
\
tuple.ipv4.saddr = ihdr->saddr; \
tuple.ipv4.daddr = ihdr->daddr; \
tuple.ipv4.sport = thdr->source; \
tuple.ipv4.dport = thdr->dest; \
\
tuple.seq = __bpf_ntohl(thdr->seq); \
tuple.delta = __bpf_ntohs(ihdr->tot_len) - ihlen - thlen; \
tuple.ack_seq = __bpf_ntohs(thdr->ack_seq) + len; \
\
if (bpf_update_tcp_seq(xdp, &tuple, sizeof(tuple.ipv4), -1, 0)) { \
ndrop++; \
return XDP_DROP; \
} \
} while (0)
#define build_reply_head(len) \
do { \
thdr->doff = 5; /* discard tcp options */ \
port = thdr->source; \
thdr->source = thdr->dest; \
thdr->dest = port; \
\
seq = __bpf_ntohl(thdr->seq); \
seq += __bpf_ntohs(ihdr->tot_len) - ihlen - thlen; \
thdr->seq = thdr->ack_seq; \
thdr->ack_seq = __bpf_ntohl(seq); \
\
ipaddr = ihdr->saddr; \
ihdr->saddr = ihdr->daddr; \
ihdr->daddr = ipaddr; \
ihdr->tot_len = __bpf_htons(ihlen + thdr->doff * 4 + len); \
\
memcpy(macaddr, ehdr->h_source, ETH_ALEN); \
memcpy(ehdr->h_source, ehdr->h_dest, ETH_ALEN); \
memcpy(ehdr->h_dest, macaddr, ETH_ALEN); \
} while (0)
SEC("bmc/main")
int bmc_main(struct xdp_md *xdp)
{
int err;
u32 klen;
u32 vlen;
unsigned int i;
unsigned int seq;
u8 macaddr[ETH_ALEN];
__be32 ipaddr;
__le16 port;
char *data = (char *)(long)xdp->data;
char *data_end = (char *)(long)xdp->data_end;
struct ethhdr *ehdr = NULL;
struct iphdr *ihdr = NULL;
struct tcphdr *thdr = NULL;
unsigned int ihlen;
unsigned int thlen;
char *payload;
u32 offset;
int is_get = 0;
int expect_get = 0;
struct redis_ctx *ctx;
struct redis_bmc_stat *stat;
char *key_data;
char *value_data;
u32 key_len;
u32 value_len;
ehdr = (struct ethhdr *)data;
if (ehdr + 1 > data_end)
return XDP_PASS;
if (ehdr->h_proto != __bpf_constant_htons(ETH_P_IP))
return XDP_PASS;
ihdr = (struct iphdr *)(ehdr + 1);
if (ihdr + 1 > data_end)
return XDP_PASS;
if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP)
return XDP_PASS;
ihlen = ihdr->ihl * 4;
if (ihdr->frag_off & __bpf_htons(IP_MF | IP_OFFSET))
return XDP_PASS;
if (__bpf_htons(ihdr->tot_len) > ETH_DATA_LEN)
return XDP_PASS;
thdr = (struct tcphdr *)(ihdr + 1);
if (thdr + 1 > data_end)
return XDP_PASS;
if (thdr->syn || thdr->fin || thdr->rst)
return XDP_PASS;
if (!is_bmc_port(thdr->dest))
return XDP_PASS;
thlen = thdr->doff * 4;
payload = (void *)thdr + thlen;
/*
* SET message format:
* "*3\r\n" // this is an array with 3 elements
* "$3\r\n" // the first element is a string with 3 characters
* "set\r\n" // the string is "set"
* "$5\r\n" // the second element is a string with 5 characters
* "key01\r\n" // the string is "key01"
* "$5\r\n" // the third element is a string with 5 characters
* "val01\r\n" // the string is "valu01"
*
* GET message format:
* "*2\r\n" // this is an array with 3 elements
* "$3\r\n" // the first element is a string with 3 characters
* "get\r\n" // the string is "get"
* "$5\r\n" // the second element is a string with 5 characters
* "key01\r\n" // the string is "key01"
*/
if (payload + 8 > data_end)
return XDP_PASS;
if (payload[0] != '*' || (payload[1] != '2' && payload[1] != '3') ||
payload[2] != '\r' || payload[3] != '\n' || payload[4] != '$' ||
payload[5] != '3' || payload[6] != '\r' || payload[7] != '\n')
return XDP_PASS;
expect_get = (payload[1] == '2');
payload += 8;
if (payload + 5 > data_end)
return XDP_PASS;
switch (payload[0]) {
case 'g':
is_get = 1;
case 's':
if (payload[1] != 'e' || payload[2] != 't' ||
payload[3] != '\r' || payload[4] != '\n')
return XDP_PASS;
break;
case 'G':
is_get = 1;
case 'S':
if (payload[1] != 'E' || payload[2] != 'T' ||
payload[3] != '\r' || payload[4] != '\n')
return XDP_PASS;
break;
default:
return XDP_PASS;
}
payload += 5;
if (expect_get != is_get)
return XDP_PASS;
ctx = get_ctx();
if (!ctx)
return XDP_PASS;
memset(ctx, 0, sizeof(*ctx));
stat = get_stat();
if (!stat)
return XDP_PASS;
extract_kvdata(key, BMC_MAX_REDIS_KEY_LEN, key_data, key_len);
if (is_get) {
struct redis_value *val;
stat->total_get_requests++;
val = bpf_map_lookup_elem(&bmc_storage, &ctx->key);
if (!val || !val->len || val->len > sizeof(val->data))
return XDP_PASS;
vlen = val->len;
sync_tcp_seq(vlen, stat->drop_get_requests);
build_reply_head(vlen);
adjust_xdp_tail(BMC_MAX_REDIS_VALUE_LEN, vlen);
data = (char *)(long)xdp->data;
data_end = (char *)(long)xdp->data_end;
ihdr = (struct iphdr *)(data + sizeof(struct ethhdr));
thdr = (struct tcphdr *)(ihdr + 1);
if (ihdr + 1 > data_end || thdr + 1 > data_end)
return XDP_PASS;
offset = sizeof(*ehdr) + ihdr->ihl * 4 + thdr->doff * 4;
bpf_xdp_store_bytes(xdp, offset, val->data, vlen);
compute_ip_checksum(ihdr);
compute_tcp_checksum(ihdr, thdr, vlen + thdr->doff * 4,
data_end);
stat->hit_get_requests++;
return XDP_TX;
} else {
char reply[] = { '+', 'O', 'K', '\r', '\n'};
stat->total_set_requests++;
/* make sure the stupid verifier will not reject the prog */
payload = key_data;
for (i = 0; i < sizeof(ctx->key.data) && i < key_len; i++)
payload++;
extract_kvdata(value, BMC_MAX_REDIS_VALUE_LEN, value_data,
value_len);
err = bpf_map_update_elem(&bmc_storage, &ctx->key,
&ctx->value, BPF_ANY);
if (err)
return XDP_PASS;
sync_tcp_seq(sizeof(reply), stat->drop_set_requests);
build_reply_head(sizeof(reply));
adjust_xdp_tail(sizeof(reply), sizeof(reply));
data = (char *)(long)xdp->data;
data_end = (char *)(long)xdp->data_end;
ihdr = (struct iphdr *)(data + sizeof(struct ethhdr));
thdr = (struct tcphdr *)(ihdr + 1);
if (ihdr + 1 > data_end || thdr + 1 > data_end)
return XDP_PASS;
offset = sizeof(*ehdr) + ihdr->ihl * 4 + thdr->doff * 4;
bpf_xdp_store_bytes(xdp, offset, reply, sizeof(reply));
compute_ip_checksum(ihdr);
compute_tcp_checksum(ihdr, thdr, thdr->doff * 4 + sizeof(reply),
data_end);
stat->hit_set_requests++;
return XDP_TX;
}
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved.
* Description: common header for both user prog and bpf kernel prog
*/
#ifndef __REDIS_BMC_COMMON_H__
#define __REDIS_BMC_COMMON_H__
#define REDIS_GET_PROG_INDEX 0
#define REDIS_SET_PROG_INDEX 1
struct redis_bmc_stat {
__u64 total_get_requests;
__u64 hit_get_requests;
__u64 drop_get_requests;
__u64 total_set_requests;
__u64 hit_set_requests;
__u64 drop_set_requests;
};
#endif
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <linux/if_link.h>
#include <unistd.h>
#include <net/if.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/select.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "common.h"
#define DEFAULT_CGROUP_PATH "/sys/fs/cgroup"
#define DEFAULT_REDIS_PORT 6379
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#endif
#define IFINDEX_NUM 8
struct {
char *cgroup_path;
char *bpf_path;
int cgroup_fd;
int map_ports_fd;
int map_storage_fd;
int map_interface_fd;
int map_stats_fd;
int redis_xdp_main_prog_fd;
uint16_t listen_port;
unsigned int ifindex;
} bmc;
struct bmc_prog_info {
const char *sec_name;
enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
int *p_prog_fd;
int *p_attach_fd;
unsigned int attach_flags;
unsigned int is_xdp_main;
const char *pin_path;
struct bpf_program *prog;
};
struct bmc_map_info {
const char *map_name;
int *p_map_fd;
char *pin_path;
struct bpf_map *map;
bool is_stat_map;
bool is_interface_map;
};
static struct bmc_prog_info prog_infos[] = {
{
.sec_name = "bmc/main",
.prog_type = BPF_PROG_TYPE_XDP,
.p_prog_fd = &bmc.redis_xdp_main_prog_fd,
.attach_flags = XDP_FLAGS_DRV_MODE, // XDP_FLAGS_SKB_MODE
.is_xdp_main = 1,
.pin_path = "/sys/fs/bpf/bmc/prog_xdp_main"
}
};
static struct bmc_map_info map_infos[] = {
{
.map_name = "bmc_ports",
.p_map_fd = &bmc.map_ports_fd,
.pin_path = "/sys/fs/bpf/bmc/map_ports"
},
{
.map_name = "bmc_storage",
.p_map_fd = &bmc.map_storage_fd,
.pin_path = "/sys/fs/bpf/bmc/map_storage"
},
{
.map_name = "bmc_interface",
.p_map_fd = &bmc.map_interface_fd,
.pin_path = "/sys/fs/bpf/bmc/interface",
.is_interface_map = true,
},
{
.map_name = "bmc_stats",
.p_map_fd = &bmc.map_stats_fd,
.pin_path = "/sys/fs/bpf/bmc/stats",
.is_stat_map = true,
},
};
static int find_type_by_sec_name(const char *sec_name,
enum bpf_prog_type *p_prog_type,
enum bpf_attach_type *p_attach_type)
{
int i;
if (sec_name == NULL) {
fprintf(stderr, "sec_name is NULL\n");
return -1;
}
for (i = 0; i < ARRAY_SIZE(prog_infos); i++) {
if (!strcmp(prog_infos[i].sec_name, sec_name)) {
*p_prog_type = prog_infos[i].prog_type;
*p_attach_type = prog_infos[i].attach_type;
return 0;
}
}
fprintf(stderr, "unknown prog %s\n", sec_name);
return -1;
}
static int set_prog_type(struct bpf_object *obj)
{
const char *sec_name;
struct bpf_program *prog;
enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
bpf_object__for_each_program(prog, obj) {
sec_name = bpf_program__section_name(prog);
if (find_type_by_sec_name(sec_name, &prog_type, &attach_type))
return -1;
bpf_program__set_type(prog, prog_type);
if (prog_type != BPF_PROG_TYPE_XDP)
bpf_program__set_expected_attach_type(prog, attach_type);
}
return 0;
}
static struct bpf_object *load_bpf_file(const char *bpf_file)
{
int err;
char err_buf[256];
struct bpf_object *obj;
obj = bpf_object__open(bpf_file);
err = libbpf_get_error(obj);
if (err) {
libbpf_strerror(err, err_buf, sizeof(err_buf));
fprintf(stderr, "unable to open bpf file %s : %s\n", bpf_file,
err_buf);
return NULL;
}
if (set_prog_type(obj)) {
bpf_object__close(obj);
return NULL;
}
err = bpf_object__load(obj);
if (err) {
fprintf(stderr, "load bpf object failed\n");
bpf_object__close(obj);
return NULL;
}
return obj;
}
static int find_prog(struct bpf_object *obj, const char *sec_name,
struct bpf_program **p_prog, int *p_prog_fd)
{
int fd;
struct bpf_program *prog;
prog = bpf_object__find_program_by_title(obj, sec_name);
if (!prog) {
fprintf(stderr, "failed to find prog %s\n", sec_name);
return -1;
}
fd = bpf_program__fd(prog);
if (fd < 0) {
fprintf(stderr, "failed to get fd of prog %s\n", sec_name);
return -1;
}
*p_prog = prog;
*p_prog_fd = fd;
return 0;
}
static void unpin_progs(int n)
{
int i;
for (i = 0; i < n; i++)
bpf_program__unpin(prog_infos[i].prog, prog_infos[i].pin_path);
}
static int find_progs(struct bpf_object *obj)
{
int i;
struct bmc_prog_info *info;
for (i = 0; i < ARRAY_SIZE(prog_infos); i++) {
info = &prog_infos[i];
if (find_prog(obj, info->sec_name, &info->prog, info->p_prog_fd))
goto error_find_prog;
if (bpf_program__pin(info->prog, info->pin_path))
goto error_find_prog;
}
return 0;
error_find_prog:
unpin_progs(i);
return -1;
}
static int find_map(struct bpf_object *obj, const char *map_name,
struct bpf_map **p_map, int *p_map_fd)
{
int fd;
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, map_name);
if (!map) {
fprintf(stderr, "failed to find map %s\n", map_name);
return -1;
}
fd = bpf_map__fd(map);
if (fd < 0) {
fprintf(stderr, "failed to get fd of map %s\n", map_name);
return -1;
}
*p_map = map;
*p_map_fd = fd;
return 0;
}
static void unpin_maps(int n)
{
int i;
for (i = 0; i < n; i++)
bpf_map__unpin(map_infos[i].map, map_infos[i].pin_path);
}
static int find_maps(struct bpf_object *obj)
{
int i;
__u32 key;
__u32 value;
int fd;
struct bmc_map_info *info;
for (i = 0; i < ARRAY_SIZE(map_infos); i++) {
info = &map_infos[i];
if (find_map(obj, info->map_name, &info->map, info->p_map_fd))
goto error_find_map;
if (bpf_map__pin(info->map, info->pin_path)) {
fprintf(stderr, "failed to pin map %s to path %s\n",
info->map_name, info->pin_path);
goto error_find_map;
}
if (info->is_interface_map) {
key = 0;
value = bmc.ifindex;
fd = bpf_map__fd(info->map);
bpf_map_update_elem(fd, &key, &value, 0);
}
}
return 0;
error_find_map:
unpin_maps(i);
return -1;
}
static void detach_xdp_progs(unsigned int ifindex, __u32 flags)
{
bpf_set_link_xdp_fd(ifindex, -1, flags);
}
static void detach_progs(int n)
{
int i;
struct bmc_prog_info *info;
for (i = 0; i < n; i++) {
info = &prog_infos[i];
if (info->is_xdp_main)
detach_xdp_progs(bmc.ifindex, info->attach_flags);
else if (info->prog_type != BPF_PROG_TYPE_XDP)
bpf_prog_detach(*info->p_prog_fd, info->attach_type);
}
}
static int attach_xdp_prog(int prog_fd, __u32 flags)
{
if (bmc.ifindex) {
if (bpf_set_link_xdp_fd(bmc.ifindex, prog_fd, flags)) {
fprintf(stderr, "failed to attach xdp prog\n");
return -1;
}
}
return 0;
}
static int attach_progs(struct bpf_object *obj)
{
int i;
int err;
int prog_fd;
int attach_fd;
unsigned int flags;
enum bpf_attach_type type;
struct bmc_prog_info *info;
for (i = 0; i < ARRAY_SIZE(prog_infos); i++) {
info = &prog_infos[i];
prog_fd = *info->p_prog_fd;
flags = info->attach_flags;
if (info->is_xdp_main)
err = attach_xdp_prog(prog_fd, flags);
else if (info->prog_type != BPF_PROG_TYPE_XDP &&
info->p_attach_fd != NULL) {
attach_fd = *info->p_attach_fd;
type = info->attach_type;
err = bpf_prog_attach(prog_fd, attach_fd, type, flags);
} else
continue;
if (err) {
fprintf(stderr, "attach prog %s failed!\n",
info->sec_name);
goto error_attach_prog;
}
}
return 0;
error_attach_prog:
detach_progs(i);
return -1;
}
static int add_bmc_port(void)
{
int ret;
int map_fd = bmc.map_ports_fd;
uint16_t port = htons(bmc.listen_port);
uint32_t key = (uint32_t)port;
uint32_t value = 1;
ret = bpf_map_update_elem(map_fd, &key, &value, 0);
if (ret)
fprintf(stderr, "failed to add port %u\n", port);
return ret;
}
static int setup_bpf(void)
{
struct bpf_object *obj;
bmc.cgroup_fd = open(bmc.cgroup_path, O_DIRECTORY, O_RDONLY);
if (bmc.cgroup_fd < 0) {
fprintf(stderr, "failed to open cgroup %s: %s\n",
bmc.cgroup_path, strerror(errno));
return -1;
}
obj = load_bpf_file(bmc.bpf_path);
if (!obj)
goto error_load_object;
if (find_progs(obj))
goto error_load_object;
if (find_maps(obj))
goto error_find_maps;
if (attach_progs(obj))
goto error_attach_progs;
if (add_bmc_port())
goto error_add_port;
return 0;
error_add_port:
detach_progs(ARRAY_SIZE(prog_infos));
error_attach_progs:
unpin_maps(ARRAY_SIZE(map_infos));
error_find_maps:
unpin_progs(ARRAY_SIZE(prog_infos));
error_load_object:
bpf_object__close(obj);
close(bmc.cgroup_fd);
return -1;
}
static int parse_load_args(int argc, char *argv[])
{
int opt;
int port;
const char *ifname = NULL;
bmc.cgroup_path = DEFAULT_CGROUP_PATH;
bmc.listen_port = DEFAULT_REDIS_PORT;
bmc.ifindex = 0;
while ((opt = getopt(argc, argv, "c:p:i:")) != -1) {
switch (opt) {
case 'c':
bmc.cgroup_path = optarg;
break;
case 'p':
port = atoi(optarg);
if (port <= 0 || port >= USHRT_MAX) {
fprintf(stderr, "invalid port: %s\n", optarg);
return -1;
}
bmc.listen_port = port;
break;
case 'i':
printf("interface: %s\n", optarg);
ifname = optarg;
bmc.ifindex = if_nametoindex(ifname);
break;
default:
fprintf(stderr, "unknown option %c\n", opt);
return -1;
}
}
if (!bmc.ifindex) {
fprintf(stderr, "no netwrok interface found\n");
return -1;
}
if (optind >= argc) {
fprintf(stderr, "no bpf prog file found\n");
return -1;
}
bmc.bpf_path = argv[optind];
printf("bpf file: %s\n", bmc.bpf_path);
printf("cgroup path: %s\n", bmc.cgroup_path);
printf("listen port: %d\n", bmc.listen_port);
printf("interface: %s\n", ifname);
return 0;
}
struct cmd {
const char *name;
int (*func)(int argc, char *argv[]);
};
static int do_prog(int argc, char *argv[]);
static int do_stat(int argc, char *argv[]);
static int do_prog_load(int argc, char *argv[]);
static int do_prog_unload(int argc, char *argv[]);
static struct cmd main_cmds[] = {
{ "prog", do_prog },
{ "stat", do_stat },
};
static struct cmd prog_cmds[] = {
{ "load", do_prog_load },
{ "unload", do_prog_unload },
};
static char *elf_name;
static int dispatch_cmd(struct cmd cmds[], int ncmd, int argc,
char *argv[], void (*help)(void))
{
int i;
int ret;
if (argc <= 0) {
help();
return -1;
}
for (i = 0; i < ncmd; i++) {
if (!strcmp(argv[0], cmds[i].name)) {
ret = cmds[i].func(argc - 1, argv + 1);
if (ret == -2) {
help();
ret = -1;
}
return ret;
}
}
help();
return -1;
}
static int do_prog_load(int argc, char *argv[])
{
if (parse_load_args(argc + 1, argv - 1) < 0)
return -2;
if (setup_bpf())
return -1;
return 0;
}
static int do_prog_unload(int argc, char *argv[])
{
int i;
int err;
int prog_fd;
int cgroup_fd;
int map_fd;
char *interface_map_path = NULL;
char *cgroup_path = DEFAULT_CGROUP_PATH;
__u32 ifindex;
__u32 key;
if (argc > 1)
cgroup_path = argv[0];
cgroup_fd = open(cgroup_path, O_DIRECTORY, O_RDONLY);
if (cgroup_fd < 0) {
fprintf(stderr, "failed to open cgroup path: %s\n",
cgroup_path);
return -1;
}
for (i = 0; i < ARRAY_SIZE(map_infos); i++) {
if (map_infos[i].is_interface_map) {
interface_map_path = map_infos[i].pin_path;
break;
}
}
if (!interface_map_path) {
fprintf(stderr, "no interface map found\n");
return -1;
}
map_fd = bpf_obj_get(interface_map_path);
if (map_fd < 0) {
fprintf(stderr, "failed to get map from %s\n",
interface_map_path);
return -1;
}
key = 0;
err = bpf_map_lookup_elem(map_fd, &key, &ifindex);
close(map_fd);
if (err) {
fprintf(stderr, "lookup interface failed\n");
return -1;
}
for (i = 0; i < ARRAY_SIZE(prog_infos); i++) {
if (prog_infos[i].attach_type == BPF_CGROUP_SOCK_OPS) {
prog_fd = bpf_obj_get(prog_infos[i].pin_path);
if (prog_fd >= 0)
bpf_prog_detach2(prog_fd, cgroup_fd,
BPF_CGROUP_SOCK_OPS);
}
if (prog_infos[i].is_xdp_main)
detach_xdp_progs(ifindex, prog_infos[i].attach_flags);
unlink(prog_infos[i].pin_path);
}
for (i = 0; i < ARRAY_SIZE(map_infos); i++)
unlink(map_infos[i].pin_path);
return 0;
}
static void do_prog_help(void)
{
fprintf(stderr,
"Usage: %s prog load [-c CGROUP_PATH] [-p LISTEN_PORT]"
" {-i INTERFACE} {BPF_FILE}\n"
" %s prog unload [CGROUP_PATH]\n",
elf_name, elf_name);
}
static int do_prog(int argc, char *argv[])
{
return dispatch_cmd(prog_cmds, ARRAY_SIZE(prog_cmds),
argc, argv, do_prog_help);
}
static int do_stat(int argc, char *argv[])
{
int i;
int fd;
int err;
int ncpu;
bool found = false;
struct bmc_map_info *info;
struct bpf_map_info map = {};
struct redis_bmc_stat *percpu_stat;
struct redis_bmc_stat stat = {};
__u32 len = sizeof(map);
__u32 key;
ncpu = sysconf(_SC_NPROCESSORS_ONLN);
if (ncpu < 0) {
fprintf(stderr, "sysconf failed: %s\n", strerror(errno));
return -1;
}
percpu_stat = malloc(sizeof(struct redis_bmc_stat) * ncpu);
if (!percpu_stat) {
fprintf(stderr, "malloc percpu stat failed\n");
return -1;
}
for (i = 0; i < ARRAY_SIZE(map_infos); i++) {
info = &map_infos[i];
if (info->is_stat_map) {
found = true;
break;
}
}
if (!found) {
fprintf(stderr, "no stats map found\n");
free(percpu_stat);
return -1;
}
fd = bpf_obj_get(info->pin_path);
if (fd < 0) {
fprintf(stderr, "failed to open %s\n",
info->pin_path);
free(percpu_stat);
return -1;
}
err = bpf_obj_get_info_by_fd(fd, &map, &len);
if (err) {
fprintf(stderr, "failed to get map info\n");
err = -1;
goto out;
}
if (map.type != BPF_MAP_TYPE_PERCPU_ARRAY) {
fprintf(stderr, "unexpected map type: %d\n", map.type);
err = -1;
goto out;
}
if (map.key_size != sizeof(__u32)) {
fprintf(stderr, "unexpected map key_size: %u\n", map.key_size);
err = -1;
goto out;
}
if (map.value_size != sizeof(struct redis_bmc_stat)) {
fprintf(stderr, "unexpected map key_size: %u\n", map.key_size);
err = -1;
goto out;
}
key = 0;
err = bpf_map_lookup_elem(fd, &key, percpu_stat);
if (err) {
fprintf(stderr, "lookup cpu stat failed, cpu=%u\n", i);
err = -1;
goto out;
}
for (int i = 0; i < ncpu; i++) {
stat.total_get_requests += percpu_stat[i].total_get_requests;
stat.hit_get_requests += percpu_stat[i].hit_get_requests;
stat.drop_get_requests += percpu_stat[i].drop_get_requests;
stat.total_set_requests += percpu_stat[i].total_set_requests;
stat.hit_set_requests += percpu_stat[i].hit_set_requests;
stat.drop_set_requests += percpu_stat[i].drop_set_requests;
}
printf("Total GET Requests: %llu\n", stat.total_get_requests);
printf("Hit GET Requests: %llu (%.2f%%)\n", stat.hit_get_requests,
stat.total_get_requests == 0 ? 0 :
(double)stat.hit_get_requests /
(double)stat.total_get_requests *
100);
printf("Dropped GET Requests: %llu (%.2lf%%)\n", stat.drop_get_requests,
stat.total_get_requests == 0 ? 0 :
(double)stat.drop_get_requests /
(double)stat.total_get_requests *
100);
printf("Total SET Requests: %llu\n", stat.total_set_requests);
printf("Hit SET Requests: %llu (%.2f%%)\n", stat.hit_set_requests,
stat.total_set_requests == 0 ? 0 :
(double)stat.hit_set_requests /
(double)stat.total_set_requests *
100);
printf("Dropped SET Requests: %llu (%.2lf%%)\n", stat.drop_set_requests,
stat.total_set_requests == 0 ? 0 :
(double)stat.drop_set_requests /
(double)stat.total_set_requests *
100);
out:
close(fd);
free(percpu_stat);
return err;
}
static void do_main_help(void)
{
fprintf(stderr,
"Usage: %s OBJECT { COMMAND | help }\n"
" OBJECT := { prog | stat }\n",
elf_name);
}
int main(int argc, char *argv[])
{
elf_name = argv[0];
return dispatch_cmd(main_cmds, ARRAY_SIZE(main_cmds),
argc - 1, argv + 1, do_main_help);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册