提交 843bd2b3 编写于 作者: D David S. Miller

Merge branch 'bpf-Improve-LRU-map-lookup-performance'

Martin KaFai Lau says:

====================
bpf: Improve LRU map lookup performance

This patchset improves the lookup performance of the LRU map.
Please see individual patch for details.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -69,7 +69,8 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) ...@@ -69,7 +69,8 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
/* ref is an approximation on access frequency. It does not /* ref is an approximation on access frequency. It does not
* have to be very accurate. Hence, no protection is used. * have to be very accurate. Hence, no protection is used.
*/ */
node->ref = 1; if (!node->ref)
node->ref = 1;
} }
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
......
...@@ -514,6 +514,29 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -514,6 +514,29 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
return NULL; return NULL;
} }
static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
const int ref_reg = BPF_REG_1;
*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
offsetof(struct htab_elem, lru_node) +
offsetof(struct bpf_lru_node, ref));
*insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
*insn++ = BPF_ST_MEM(BPF_B, ret,
offsetof(struct htab_elem, lru_node) +
offsetof(struct bpf_lru_node, ref),
1);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
round_up(map->key_size, 8));
return insn - insn_buf;
}
/* It is called from the bpf_lru_list when the LRU needs to delete /* It is called from the bpf_lru_list when the LRU needs to delete
* older elements from the htab. * older elements from the htab.
*/ */
...@@ -1137,6 +1160,7 @@ const struct bpf_map_ops htab_lru_map_ops = { ...@@ -1137,6 +1160,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_elem = htab_lru_map_lookup_elem,
.map_update_elem = htab_lru_map_update_elem, .map_update_elem = htab_lru_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem, .map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
}; };
/* Called from eBPF program */ /* Called from eBPF program */
......
...@@ -88,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = { ...@@ -88,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = {
.max_entries = MAX_ENTRIES, .max_entries = MAX_ENTRIES,
}; };
struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
.type = BPF_MAP_TYPE_LRU_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
};
SEC("kprobe/sys_getuid") SEC("kprobe/sys_getuid")
int stress_hmap(struct pt_regs *ctx) int stress_hmap(struct pt_regs *ctx)
{ {
...@@ -148,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) ...@@ -148,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
SEC("kprobe/sys_connect") SEC("kprobe/sys_connect")
int stress_lru_hmap_alloc(struct pt_regs *ctx) int stress_lru_hmap_alloc(struct pt_regs *ctx)
{ {
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
union {
u16 dst6[8];
struct {
u16 magic0;
u16 magic1;
u16 tcase;
u16 unused16;
u32 unused32;
u32 key;
};
} test_params;
struct sockaddr_in6 *in6; struct sockaddr_in6 *in6;
u16 test_case, dst6[8]; u16 test_case;
int addrlen, ret; int addrlen, ret;
char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
long val = 1; long val = 1;
u32 key = bpf_get_prandom_u32(); u32 key = 0;
in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
addrlen = (int)PT_REGS_PARM3(ctx); addrlen = (int)PT_REGS_PARM3(ctx);
...@@ -161,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) ...@@ -161,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
if (addrlen != sizeof(*in6)) if (addrlen != sizeof(*in6))
return 0; return 0;
ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
&in6->sin6_addr);
if (ret) if (ret)
goto done; goto done;
if (dst6[0] != 0xdead || dst6[1] != 0xbeef) if (test_params.magic0 != 0xdead ||
test_params.magic1 != 0xbeef)
return 0; return 0;
test_case = dst6[7]; test_case = test_params.tcase;
if (test_case != 3)
key = bpf_get_prandom_u32();
if (test_case == 0) { if (test_case == 0) {
ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
...@@ -188,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) ...@@ -188,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
ret = bpf_map_update_elem(nolocal_lru_map, &key, &val, ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
BPF_ANY); BPF_ANY);
} else if (test_case == 3) {
u32 i;
key = test_params.key;
#pragma clang loop unroll(full)
for (i = 0; i < 32; i++) {
bpf_map_lookup_elem(&lru_hash_lookup_map, &key);
key++;
}
} else { } else {
ret = -EINVAL; ret = -EINVAL;
} }
......
...@@ -46,6 +46,7 @@ enum test_type { ...@@ -46,6 +46,7 @@ enum test_type {
HASH_LOOKUP, HASH_LOOKUP,
ARRAY_LOOKUP, ARRAY_LOOKUP,
INNER_LRU_HASH_PREALLOC, INNER_LRU_HASH_PREALLOC,
LRU_HASH_LOOKUP,
NR_TESTS, NR_TESTS,
}; };
...@@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = { ...@@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = {
[HASH_LOOKUP] = "hash_map", [HASH_LOOKUP] = "hash_map",
[ARRAY_LOOKUP] = "array_map", [ARRAY_LOOKUP] = "array_map",
[INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
[LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
}; };
static int test_flags = ~0; static int test_flags = ~0;
...@@ -67,6 +69,8 @@ static uint32_t num_map_entries; ...@@ -67,6 +69,8 @@ static uint32_t num_map_entries;
static uint32_t inner_lru_hash_size; static uint32_t inner_lru_hash_size;
static int inner_lru_hash_idx = -1; static int inner_lru_hash_idx = -1;
static int array_of_lru_hashs_idx = -1; static int array_of_lru_hashs_idx = -1;
static int lru_hash_lookup_idx = -1;
static int lru_hash_lookup_test_entries = 32;
static uint32_t max_cnt = 1000000; static uint32_t max_cnt = 1000000;
static int check_test_flags(enum test_type t) static int check_test_flags(enum test_type t)
...@@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu) ...@@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu)
cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
} }
static int pre_test_lru_hash_lookup(int tasks)
{
int fd = map_fd[lru_hash_lookup_idx];
uint32_t key;
long val = 1;
int ret;
if (num_map_entries > lru_hash_lookup_test_entries)
lru_hash_lookup_test_entries = num_map_entries;
/* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test.
*
* It is fine that the user requests for a map with
* num_map_entries < 32 and some of the later lru hash lookup
* may return not found. For LRU map, we are not interested
* in such small map performance.
*/
for (key = 0; key < lru_hash_lookup_test_entries; key++) {
ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST);
if (ret)
return ret;
}
return 0;
}
static void do_test_lru(enum test_type test, int cpu) static void do_test_lru(enum test_type test, int cpu)
{ {
static int inner_lru_map_fds[MAX_NR_CPUS]; static int inner_lru_map_fds[MAX_NR_CPUS];
...@@ -135,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu) ...@@ -135,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu)
if (test == LRU_HASH_PREALLOC) { if (test == LRU_HASH_PREALLOC) {
test_name = "lru_hash_map_perf"; test_name = "lru_hash_map_perf";
in6.sin6_addr.s6_addr16[7] = 0; in6.sin6_addr.s6_addr16[2] = 0;
} else if (test == NOCOMMON_LRU_HASH_PREALLOC) { } else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
test_name = "nocommon_lru_hash_map_perf"; test_name = "nocommon_lru_hash_map_perf";
in6.sin6_addr.s6_addr16[7] = 1; in6.sin6_addr.s6_addr16[2] = 1;
} else if (test == INNER_LRU_HASH_PREALLOC) { } else if (test == INNER_LRU_HASH_PREALLOC) {
test_name = "inner_lru_hash_map_perf"; test_name = "inner_lru_hash_map_perf";
in6.sin6_addr.s6_addr16[7] = 2; in6.sin6_addr.s6_addr16[2] = 2;
} else if (test == LRU_HASH_LOOKUP) {
test_name = "lru_hash_lookup_perf";
in6.sin6_addr.s6_addr16[2] = 3;
in6.sin6_addr.s6_addr32[3] = 0;
} else { } else {
assert(0); assert(0);
} }
...@@ -150,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu) ...@@ -150,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu)
for (i = 0; i < max_cnt; i++) { for (i = 0; i < max_cnt; i++) {
ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
assert(ret == -1 && errno == EBADF); assert(ret == -1 && errno == EBADF);
if (in6.sin6_addr.s6_addr32[3] <
lru_hash_lookup_test_entries - 32)
in6.sin6_addr.s6_addr32[3] += 32;
else
in6.sin6_addr.s6_addr32[3] = 0;
} }
printf("%d:%s pre-alloc %lld events per sec\n", printf("%d:%s pre-alloc %lld events per sec\n",
cpu, test_name, cpu, test_name,
...@@ -171,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu) ...@@ -171,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu)
do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
} }
static void test_lru_hash_lookup(int cpu)
{
do_test_lru(LRU_HASH_LOOKUP, cpu);
}
static void test_percpu_hash_prealloc(int cpu) static void test_percpu_hash_prealloc(int cpu)
{ {
__u64 start_time; __u64 start_time;
...@@ -243,6 +287,11 @@ static void test_array_lookup(int cpu) ...@@ -243,6 +287,11 @@ static void test_array_lookup(int cpu)
cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
} }
typedef int (*pre_test_func)(int tasks);
const pre_test_func pre_test_funcs[] = {
[LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup,
};
typedef void (*test_func)(int cpu); typedef void (*test_func)(int cpu);
const test_func test_funcs[] = { const test_func test_funcs[] = {
[HASH_PREALLOC] = test_hash_prealloc, [HASH_PREALLOC] = test_hash_prealloc,
...@@ -255,8 +304,25 @@ const test_func test_funcs[] = { ...@@ -255,8 +304,25 @@ const test_func test_funcs[] = {
[HASH_LOOKUP] = test_hash_lookup, [HASH_LOOKUP] = test_hash_lookup,
[ARRAY_LOOKUP] = test_array_lookup, [ARRAY_LOOKUP] = test_array_lookup,
[INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
[LRU_HASH_LOOKUP] = test_lru_hash_lookup,
}; };
static int pre_test(int tasks)
{
int i;
for (i = 0; i < NR_TESTS; i++) {
if (pre_test_funcs[i] && check_test_flags(i)) {
int ret = pre_test_funcs[i](tasks);
if (ret)
return ret;
}
}
return 0;
}
static void loop(int cpu) static void loop(int cpu)
{ {
cpu_set_t cpuset; cpu_set_t cpuset;
...@@ -277,6 +343,8 @@ static void run_perf_test(int tasks) ...@@ -277,6 +343,8 @@ static void run_perf_test(int tasks)
pid_t pid[tasks]; pid_t pid[tasks];
int i; int i;
assert(!pre_test(tasks));
for (i = 0; i < tasks; i++) { for (i = 0; i < tasks; i++) {
pid[i] = fork(); pid[i] = fork();
if (pid[i] == 0) { if (pid[i] == 0) {
...@@ -344,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx) ...@@ -344,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx)
array_of_lru_hashs_idx = idx; array_of_lru_hashs_idx = idx;
} }
if (!strcmp("lru_hash_lookup_map", map->name))
lru_hash_lookup_idx = idx;
if (num_map_entries <= 0) if (num_map_entries <= 0)
return; return;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册