提交 4fe84359 编写于 作者: A Alexei Starovoitov 提交者: David S. Miller

bpf: convert htab map to hlist_nulls

when all map elements are pre-allocated one cpu can delete and reuse htab_elem
while another cpu is still walking the hlist. In such case the lookup may
miss the element. Convert hlist to hlist_nulls to avoid such scenario.
When bucket lock is taken there is no need to take such precautions,
so only convert map_lookup and map_get_next to nulls.
The race window is extremely small and only reproducible with explicit
udelay() inside lookup_nulls_elem_raw()

Similar to hlist add hlist_nulls_for_each_entry_safe() and
hlist_nulls_entry_safe() helpers.

Fixes: 6c905981 ("bpf: pre-allocate hash map elements")
Reported-by: NJonathan Perry <jonperry@fb.com>
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
Acked-by: NDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 9f691549
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -29,6 +29,11 @@ struct hlist_nulls_node {
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
#define hlist_nulls_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
!is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
})
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
......
......@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
/**
* hlist_nulls_for_each_entry_safe -
* iterate over list of given type safe against removal of list entry
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_nulls_node within the struct.
*/
#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
for (({barrier();}), \
pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif
......@@ -13,11 +13,12 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
struct bucket {
struct hlist_head head;
struct hlist_nulls_head head;
raw_spinlock_t lock;
};
......@@ -44,7 +45,7 @@ enum extra_elem_state {
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
struct hlist_node hash_node;
struct hlist_nulls_node hash_node;
struct {
void *padding;
union {
......@@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
goto free_htab;
for (i = 0; i < htab->n_buckets; i++) {
INIT_HLIST_HEAD(&htab->buckets[i].head);
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
}
......@@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
/* this lookup function can only be called with bucket lock taken */
static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
{
struct hlist_nulls_node *n;
struct htab_elem *l;
hlist_for_each_entry_rcu(l, head, hash_node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
return NULL;
}
/* can be called without bucket lock. it will repeat the loop in
* the unlikely event when elements moved from one bucket into another
* while link list is being walked
*/
static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
u32 hash, void *key,
u32 key_size, u32 n_buckets)
{
struct hlist_nulls_node *n;
struct htab_elem *l;
again:
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
goto again;
return NULL;
}
/* Called from syscall or from eBPF program */
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_head *head;
struct hlist_nulls_head *head;
struct htab_elem *l;
u32 hash, key_size;
......@@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
head = select_bucket(htab, hash);
l = lookup_elem_raw(head, hash, key, key_size);
l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
return l;
}
......@@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
struct bpf_htab *htab = (struct bpf_htab *)arg;
struct htab_elem *l, *tgt_l;
struct hlist_head *head;
struct htab_elem *l = NULL, *tgt_l;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
unsigned long flags;
struct bucket *b;
......@@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
raw_spin_lock_irqsave(&b->lock, flags);
hlist_for_each_entry_rcu(l, head, hash_node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
hlist_del_rcu(&l->hash_node);
hlist_nulls_del_rcu(&l->hash_node);
break;
}
......@@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_head *head;
struct hlist_nulls_head *head;
struct htab_elem *l, *next_l;
u32 hash, key_size;
int i;
......@@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
head = select_bucket(htab, hash);
/* lookup the key */
l = lookup_elem_raw(head, hash, key, key_size);
l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
if (!l) {
i = 0;
......@@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
}
/* key was found, get next key in the same bucket */
next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
struct htab_elem, hash_node);
if (next_l) {
......@@ -511,7 +537,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
head = select_bucket(htab, i);
/* pick first element in the bucket */
next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
struct htab_elem, hash_node);
if (next_l) {
/* if it's not empty, just return it */
......@@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
struct hlist_head *head;
struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
......@@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
hlist_add_head_rcu(&l_new->hash_node, head);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
hlist_del_rcu(&l_old->hash_node);
hlist_nulls_del_rcu(&l_old->hash_node);
free_htab_elem(htab, l_old);
}
ret = 0;
......@@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new, *l_old = NULL;
struct hlist_head *head;
struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
......@@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
hlist_add_head_rcu(&l_new->hash_node, head);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
bpf_lru_node_set_ref(&l_new->lru_node);
hlist_del_rcu(&l_old->hash_node);
hlist_nulls_del_rcu(&l_old->hash_node);
}
ret = 0;
......@@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
struct hlist_head *head;
struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
......@@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = PTR_ERR(l_new);
goto err;
}
hlist_add_head_rcu(&l_new->hash_node, head);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
ret = 0;
err:
......@@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
struct hlist_head *head;
struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
......@@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
} else {
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
hlist_add_head_rcu(&l_new->hash_node, head);
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
ret = 0;
......@@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_head *head;
struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
......@@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
hlist_del_rcu(&l->hash_node);
hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
ret = 0;
}
......@@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_head *head;
struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
......@@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
hlist_del_rcu(&l->hash_node);
hlist_nulls_del_rcu(&l->hash_node);
ret = 0;
}
......@@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
int i;
for (i = 0; i < htab->n_buckets; i++) {
struct hlist_head *head = select_bucket(htab, i);
struct hlist_node *n;
struct hlist_nulls_head *head = select_bucket(htab, i);
struct hlist_nulls_node *n;
struct htab_elem *l;
hlist_for_each_entry_safe(l, n, head, hash_node) {
hlist_del_rcu(&l->hash_node);
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
hlist_nulls_del_rcu(&l->hash_node);
if (l->state != HTAB_EXTRA_ELEM_USED)
htab_elem_free(htab, l);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部