提交 76707127 编写于 作者: D David S. Miller

Merge branch 'bpf-lpm-delete'

Craig Gallek says:

====================
Implement delete for BPF LPM trie

This was previously left as a TODO.  Add the implementation and
extend the test to cover it.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -389,10 +389,84 @@ static int trie_update_elem(struct bpf_map *map,
return ret;
}
static int trie_delete_elem(struct bpf_map *map, void *key)
/* Called from syscall or from eBPF program */
static int trie_delete_elem(struct bpf_map *map, void *_key)
{
/* TODO */
return -ENOSYS;
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key;
struct lpm_trie_node __rcu **trim;
struct lpm_trie_node *node;
unsigned long irq_flags;
unsigned int next_bit;
size_t matchlen = 0;
int ret = 0;
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping
* track of where we could begin trimming the tree. The trim-point
* is the sub-tree along the walk consisting of only single-child
* intermediate nodes and ending at a leaf node that we want to
* remove.
*/
trim = &trie->root;
node = rcu_dereference_protected(
trie->root, lockdep_is_held(&trie->lock));
while (node) {
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
node->prefixlen == key->prefixlen)
break;
next_bit = extract_bit(key->data, node->prefixlen);
/* If we hit a node that has more than one child or is a valid
* prefix itself, do not remove it. Reset the root of the trim
* path to its descendant on our path.
*/
if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
(node->child[0] && node->child[1]))
trim = &node->child[next_bit];
node = rcu_dereference_protected(
node->child[next_bit], lockdep_is_held(&trie->lock));
}
if (!node || node->prefixlen != key->prefixlen ||
(node->flags & LPM_TREE_NODE_FLAG_IM)) {
ret = -ENOENT;
goto out;
}
trie->n_entries--;
/* If the node we are removing is not a leaf node, simply mark it
* as intermediate and we are done.
*/
if (rcu_access_pointer(node->child[0]) ||
rcu_access_pointer(node->child[1])) {
node->flags |= LPM_TREE_NODE_FLAG_IM;
goto out;
}
/* trim should now point to the slot holding the start of a path from
* zero or more intermediate nodes to our leaf node for deletion.
*/
while ((node = rcu_dereference_protected(
*trim, lockdep_is_held(&trie->lock)))) {
RCU_INIT_POINTER(*trim, NULL);
trim = rcu_access_pointer(node->child[0]) ?
&node->child[0] :
&node->child[1];
kfree_rcu(node, rcu);
}
out:
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
return ret;
}
#define LPM_DATA_SIZE_MAX 256
......
......@@ -31,6 +31,10 @@ struct tlpm_node {
uint8_t key[];
};
static struct tlpm_node *tlpm_match(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits);
static struct tlpm_node *tlpm_add(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits)
......@@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list,
struct tlpm_node *node;
size_t n;
n = (n_bits + 7) / 8;
/* 'overwrite' an equivalent entry if one already exists */
node = tlpm_match(list, key, n_bits);
if (node && node->n_bits == n_bits) {
memcpy(node->key, key, n);
return list;
}
/* add new entry with @key/@n_bits to @list and return new head */
n = (n_bits + 7) / 8;
node = malloc(sizeof(*node) + n);
assert(node);
......@@ -92,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list,
return best;
}
static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits)
{
struct tlpm_node *best = tlpm_match(list, key, n_bits);
struct tlpm_node *node;
if (!best || best->n_bits != n_bits)
return list;
if (best == list) {
node = best->next;
free(best);
return node;
}
for (node = list; node; node = node->next) {
if (node->next == best) {
node->next = best->next;
free(best);
return list;
}
}
/* should never get here */
assert(0);
return list;
}
static void test_lpm_basic(void)
{
struct tlpm_node *list = NULL, *t1, *t2;
......@@ -114,6 +154,13 @@ static void test_lpm_basic(void)
assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
tlpm_clear(list);
}
......@@ -158,7 +205,7 @@ static void test_lpm_order(void)
static void test_lpm_map(int keysize)
{
size_t i, j, n_matches, n_nodes, n_lookups;
size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
struct bpf_lpm_trie_key *key;
uint8_t *data, *value;
......@@ -170,6 +217,7 @@ static void test_lpm_map(int keysize)
*/
n_matches = 0;
n_matches_after_delete = 0;
n_nodes = 1 << 8;
n_lookups = 1 << 16;
......@@ -223,15 +271,54 @@ static void test_lpm_map(int keysize)
}
}
/* Remove the first half of the elements in the tlpm and the
* corresponding nodes from the bpf-lpm. Then run the same
* large number of random lookups in both and make sure they match.
* Note: we need to count the number of nodes actually inserted
* since there may have been duplicates.
*/
for (i = 0, t = list; t; i++, t = t->next)
;
for (j = 0; j < i / 2; ++j) {
key->prefixlen = list->n_bits;
memcpy(key->data, list->key, keysize);
r = bpf_map_delete_elem(map, key);
assert(!r);
list = tlpm_delete(list, list->key, list->n_bits);
assert(list);
}
for (i = 0; i < n_lookups; ++i) {
for (j = 0; j < keysize; ++j)
data[j] = rand() & 0xff;
t = tlpm_match(list, data, 8 * keysize);
key->prefixlen = 8 * keysize;
memcpy(key->data, data, keysize);
r = bpf_map_lookup_elem(map, key, value);
assert(!r || errno == ENOENT);
assert(!t == !!r);
if (t) {
++n_matches_after_delete;
assert(t->n_bits == value[keysize]);
for (j = 0; j < t->n_bits; ++j)
assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
(value[j / 8] & (1 << (7 - j % 8))));
}
}
close(map);
tlpm_clear(list);
/* With 255 random nodes in the map, we are pretty likely to match
* something on every lookup. For statistics, use this:
*
* printf(" nodes: %zu\n"
* "lookups: %zu\n"
* "matches: %zu\n", n_nodes, n_lookups, n_matches);
* printf(" nodes: %zu\n"
* " lookups: %zu\n"
* " matches: %zu\n"
* "matches(delete): %zu\n",
* n_nodes, n_lookups, n_matches, n_matches_after_delete);
*/
}
......@@ -331,6 +418,108 @@ static void test_lpm_ipaddr(void)
close(map_fd_ipv6);
}
static void test_lpm_delete(void)
{
struct bpf_lpm_trie_key *key;
size_t key_size;
int map_fd;
__u64 value;
key_size = sizeof(*key) + sizeof(__u32);
key = alloca(key_size);
map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
key_size, sizeof(value),
100, BPF_F_NO_PREALLOC);
assert(map_fd >= 0);
/* Add nodes:
* 192.168.0.0/16 (1)
* 192.168.0.0/24 (2)
* 192.168.128.0/24 (3)
* 192.168.1.0/24 (4)
*
* (1)
* / \
* (IM) (3)
* / \
* (2) (4)
*/
value = 1;
key->prefixlen = 16;
inet_pton(AF_INET, "192.168.0.0", key->data);
assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
value = 2;
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.0.0", key->data);
assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
value = 3;
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.128.0", key->data);
assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
value = 4;
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.1.0", key->data);
assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
/* remove non-existent node */
key->prefixlen = 32;
inet_pton(AF_INET, "10.0.0.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
errno == ENOENT);
/* assert initial lookup */
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.0.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
assert(value == 2);
/* remove leaf node */
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.0.0", key->data);
assert(bpf_map_delete_elem(map_fd, key) == 0);
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.0.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
assert(value == 1);
/* remove leaf (and intermediary) node */
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.1.0", key->data);
assert(bpf_map_delete_elem(map_fd, key) == 0);
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.1.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
assert(value == 1);
/* remove root node */
key->prefixlen = 16;
inet_pton(AF_INET, "192.168.0.0", key->data);
assert(bpf_map_delete_elem(map_fd, key) == 0);
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.128.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
assert(value == 3);
/* remove last node */
key->prefixlen = 24;
inet_pton(AF_INET, "192.168.128.0", key->data);
assert(bpf_map_delete_elem(map_fd, key) == 0);
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.128.1", key->data);
assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
errno == ENOENT);
close(map_fd);
}
int main(void)
{
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
......@@ -353,6 +542,8 @@ int main(void)
test_lpm_ipaddr();
test_lpm_delete();
printf("test_lpm: OK\n");
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册