提交 4bfc4714 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2020-12-28

The following pull-request contains BPF updates for your *net* tree.

There is a small merge conflict between bpf tree commit 69ca310f
("bpf: Save correct stopping point in file seq iteration") and net tree
commit 66ed5944 ("bpf/task_iter: In task_file_seq_get_next use
task_lookup_next_fd_rcu"). The get_files_struct() does not exist anymore
in net, so take the hunk in HEAD and add the `info->tid = curr_tid` to
the error path:

  [...]
                curr_task = task_seq_get_next(ns, &curr_tid, true);
                if (!curr_task) {
                        info->task = NULL;
                        info->tid = curr_tid;
                        return NULL;
                }

                /* set info->task and info->tid */
  [...]

We've added 10 non-merge commits during the last 9 day(s) which contain
a total of 11 files changed, 75 insertions(+), 20 deletions(-).

The main changes are:

1) Various AF_XDP fixes such as fill/completion ring leak on failed bind and
   fixing a race in skb mode's backpressure mechanism, from Magnus Karlsson.

2) Fix latency spikes on lockdep enabled kernels by adding a rescheduling
   point to BPF hashtab initialization, from Eric Dumazet.

3) Fix a splat in task iterator by saving the correct stopping point in the
   seq file iteration, from Jonathan Lemon.

4) Fix BPF maps selftest by adding retries in case hashtab returns EBUSY
   errors on update/deletes, from Andrii Nakryiko.

5) Fix BPF selftest error reporting to something more user friendly if the
   vmlinux BTF cannot be found, from Kamal Mostafa.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -58,10 +58,6 @@ struct xdp_sock { ...@@ -58,10 +58,6 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp; struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list; struct list_head tx_list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/
spinlock_t tx_completion_lock;
/* Protects generic receive. */ /* Protects generic receive. */
spinlock_t rx_lock; spinlock_t rx_lock;
......
...@@ -73,6 +73,11 @@ struct xsk_buff_pool { ...@@ -73,6 +73,11 @@ struct xsk_buff_pool {
bool dma_need_sync; bool dma_need_sync;
bool unaligned; bool unaligned;
void *addrs; void *addrs;
/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
* NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
* sockets share a single cq when the same netdev and queue id is shared.
*/
spinlock_t cq_lock;
struct xdp_buff_xsk *free_heads[]; struct xdp_buff_xsk *free_heads[];
}; };
......
...@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab) ...@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
lockdep_set_class(&htab->buckets[i].lock, lockdep_set_class(&htab->buckets[i].lock,
&htab->lockdep_key); &htab->lockdep_key);
} }
cond_resched();
} }
} }
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/license.h> #include <linux/license.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/version.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/cred.h> #include <linux/cred.h>
......
...@@ -37,7 +37,7 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns, ...@@ -37,7 +37,7 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
if (!task) { if (!task) {
++*tid; ++*tid;
goto retry; goto retry;
} else if (skip_if_dup_files && task->tgid != task->pid && } else if (skip_if_dup_files && !thread_group_leader(task) &&
task->files == task->group_leader->files) { task->files == task->group_leader->files) {
put_task_struct(task); put_task_struct(task);
task = NULL; task = NULL;
...@@ -151,14 +151,14 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) ...@@ -151,14 +151,14 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
curr_task = info->task; curr_task = info->task;
curr_fd = info->fd; curr_fd = info->fd;
} else { } else {
curr_task = task_seq_get_next(ns, &curr_tid, true); curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task) { if (!curr_task) {
info->task = NULL; info->task = NULL;
return NULL; info->tid = curr_tid;
} return NULL;
}
/* set info->task and info->tid */
info->task = curr_task; /* set info->task and info->tid */
if (curr_tid == info->tid) { if (curr_tid == info->tid) {
curr_fd = info->fd; curr_fd = info->fd;
} else { } else {
......
...@@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb) ...@@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk); struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags); spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_submit_addr(xs->pool->cq, addr); xskq_prod_submit_addr(xs->pool->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags); spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
sock_wfree(skb); sock_wfree(skb);
} }
...@@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk) ...@@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk)
bool sent_frame = false; bool sent_frame = false;
struct xdp_desc desc; struct xdp_desc desc;
struct sk_buff *skb; struct sk_buff *skb;
unsigned long flags;
int err = 0; int err = 0;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
...@@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk) ...@@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement * if there is space in it. This avoids having to implement
* any buffering in the Tx path. * any buffering in the Tx path.
*/ */
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) { if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb); kfree_skb(skb);
goto out; goto out;
} }
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb->dev = xs->dev; skb->dev = xs->dev;
skb->priority = sk->sk_priority; skb->priority = sk->sk_priority;
...@@ -483,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk) ...@@ -483,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk)
if (err == NETDEV_TX_BUSY) { if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */ /* Tell user-space to retry the send */
skb->destructor = sock_wfree; skb->destructor = sock_wfree;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_cancel(xs->pool->cq);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
/* Free skb without triggering the perf drop trace */ /* Free skb without triggering the perf drop trace */
consume_skb(skb); consume_skb(skb);
err = -EAGAIN; err = -EAGAIN;
...@@ -878,6 +885,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -878,6 +885,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
} }
} }
/* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
xs->fq_tmp = NULL;
xs->cq_tmp = NULL;
xs->dev = dev; xs->dev = dev;
xs->zc = xs->umem->zc; xs->zc = xs->umem->zc;
xs->queue_id = qid; xs->queue_id = qid;
...@@ -1299,7 +1310,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, ...@@ -1299,7 +1310,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs->state = XSK_READY; xs->state = XSK_READY;
mutex_init(&xs->mutex); mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock); spin_lock_init(&xs->rx_lock);
spin_lock_init(&xs->tx_completion_lock);
INIT_LIST_HEAD(&xs->map_list); INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock); spin_lock_init(&xs->map_list_lock);
......
...@@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, ...@@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list); INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock); spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
refcount_set(&pool->users, 1); refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp; pool->fq = xs->fq_tmp;
pool->cq = xs->cq_tmp; pool->cq = xs->cq_tmp;
xs->fq_tmp = NULL;
xs->cq_tmp = NULL;
for (i = 0; i < pool->free_heads_cnt; i++) { for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i]; xskb = &pool->heads[i];
......
...@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q) ...@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true; return xskq_prod_nb_free(q, 1) ? false : true;
} }
static inline void xskq_prod_cancel(struct xsk_queue *q)
{
q->cached_prod--;
}
static inline int xskq_prod_reserve(struct xsk_queue *q) static inline int xskq_prod_reserve(struct xsk_queue *q)
{ {
if (xskq_prod_is_full(q)) if (xskq_prod_is_full(q))
......
...@@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ ...@@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/sys/kernel/btf/vmlinux \ /sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r) /boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests. # to build individual tests.
......
...@@ -1312,22 +1312,58 @@ static void test_map_stress(void) ...@@ -1312,22 +1312,58 @@ static void test_map_stress(void)
#define DO_UPDATE 1 #define DO_UPDATE 1
#define DO_DELETE 0 #define DO_DELETE 0
#define MAP_RETRIES 20
static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
usleep(1);
attempts--;
}
return 0;
}
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
usleep(1);
attempts--;
}
return 0;
}
static void test_update_delete(unsigned int fn, void *data) static void test_update_delete(unsigned int fn, void *data)
{ {
int do_update = ((int *)data)[1]; int do_update = ((int *)data)[1];
int fd = ((int *)data)[0]; int fd = ((int *)data)[0];
int i, key, value; int i, key, value, err;
for (i = fn; i < MAP_SIZE; i += TASKS) { for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i; key = value = i;
if (do_update) { if (do_update) {
assert(bpf_map_update_elem(fd, &key, &value, err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
BPF_NOEXIST) == 0); if (err)
assert(bpf_map_update_elem(fd, &key, &value, printf("error %d %d\n", err, errno);
BPF_EXIST) == 0); assert(err == 0);
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
} else { } else {
assert(bpf_map_delete_elem(fd, &key) == 0); err = map_delete_retriable(fd, &key, MAP_RETRIES);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
} }
} }
} }
......
...@@ -715,7 +715,7 @@ static void worker_pkt_dump(void) ...@@ -715,7 +715,7 @@ static void worker_pkt_dump(void)
int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
if (payload == EOT) { if (payload == EOT) {
ksft_print_msg("End-of-tranmission frame received\n"); ksft_print_msg("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n"); fprintf(stdout, "---------------------------------------\n");
break; break;
} }
...@@ -747,7 +747,7 @@ static void worker_pkt_validate(void) ...@@ -747,7 +747,7 @@ static void worker_pkt_validate(void)
} }
if (payloadseqnum == EOT) { if (payloadseqnum == EOT) {
ksft_print_msg("End-of-tranmission frame received: PASS\n"); ksft_print_msg("End-of-transmission frame received: PASS\n");
sigvar = 1; sigvar = 1;
break; break;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册