xsk_buff_pool.c 14.4 KB
Newer Older
1 2 3 4
// SPDX-License-Identifier: GPL-2.0

#include <net/xsk_buff_pool.h>
#include <net/xdp_sock.h>
5
#include <net/xdp_sock_drv.h>
6 7

#include "xsk_queue.h"
8 9
#include "xdp_umem.h"
#include "xsk.h"
10

11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
	unsigned long flags;

	if (!xs->tx)
		return;

	spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
	list_add_rcu(&xs->tx_list, &pool->xsk_tx_list);
	spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
}

void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
{
	unsigned long flags;

	if (!xs->tx)
		return;

	spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
	list_del_rcu(&xs->tx_list);
	spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
}

35 36 37 38 39 40 41 42 43
void xp_destroy(struct xsk_buff_pool *pool)
{
	if (!pool)
		return;

	kvfree(pool->heads);
	kvfree(pool);
}

44 45
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
						struct xdp_umem *umem)
46
{
M
Magnus Karlsson 已提交
47
	bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
48 49
	struct xsk_buff_pool *pool;
	struct xdp_buff_xsk *xskb;
M
Magnus Karlsson 已提交
50
	u32 i, entries;
51

M
Magnus Karlsson 已提交
52 53
	entries = unaligned ? umem->chunks : 0;
	pool = kvzalloc(struct_size(pool, free_heads, entries),	GFP_KERNEL);
54 55 56
	if (!pool)
		goto out;

57
	pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL);
58 59 60
	if (!pool->heads)
		goto out;

61 62 63 64 65 66
	pool->chunk_mask = ~((u64)umem->chunk_size - 1);
	pool->addrs_cnt = umem->size;
	pool->heads_cnt = umem->chunks;
	pool->free_heads_cnt = umem->chunks;
	pool->headroom = umem->headroom;
	pool->chunk_size = umem->chunk_size;
M
Magnus Karlsson 已提交
67 68
	pool->chunk_shift = ffs(umem->chunk_size) - 1;
	pool->unaligned = unaligned;
69 70
	pool->frame_len = umem->chunk_size - umem->headroom -
		XDP_PACKET_HEADROOM;
71
	pool->umem = umem;
72
	pool->addrs = umem->addrs;
73
	INIT_LIST_HEAD(&pool->free_list);
74 75
	INIT_LIST_HEAD(&pool->xsk_tx_list);
	spin_lock_init(&pool->xsk_tx_list_lock);
76
	spin_lock_init(&pool->cq_lock);
77
	refcount_set(&pool->users, 1);
78

79 80 81
	pool->fq = xs->fq_tmp;
	pool->cq = xs->cq_tmp;

82 83 84
	for (i = 0; i < pool->free_heads_cnt; i++) {
		xskb = &pool->heads[i];
		xskb->pool = pool;
85
		xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
M
Magnus Karlsson 已提交
86 87 88 89
		if (pool->unaligned)
			pool->free_heads[i] = xskb;
		else
			xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
90 91
	}

92
	return pool;
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107

out:
	xp_destroy(pool);
	return NULL;
}

void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
{
	u32 i;

	for (i = 0; i < pool->heads_cnt; i++)
		pool->heads[i].xdp.rxq = rxq;
}
EXPORT_SYMBOL(xp_set_rxq_info);

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
{
	struct netdev_bpf bpf;
	int err;

	ASSERT_RTNL();

	if (pool->umem->zc) {
		bpf.command = XDP_SETUP_XSK_POOL;
		bpf.xsk.pool = NULL;
		bpf.xsk.queue_id = pool->queue_id;

		err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf);

		if (err)
			WARN(1, "Failed to disable zero-copy!\n");
	}
}

127 128
int xp_assign_dev(struct xsk_buff_pool *pool,
		  struct net_device *netdev, u16 queue_id, u16 flags)
129 130 131 132 133 134 135 136 137 138 139 140 141
{
	bool force_zc, force_copy;
	struct netdev_bpf bpf;
	int err = 0;

	ASSERT_RTNL();

	force_zc = flags & XDP_ZEROCOPY;
	force_copy = flags & XDP_COPY;

	if (force_zc && force_copy)
		return -EINVAL;

142
	if (xsk_get_pool_from_qid(netdev, queue_id))
143 144
		return -EBUSY;

145 146
	pool->netdev = netdev;
	pool->queue_id = queue_id;
147
	err = xsk_reg_pool_at_qid(netdev, pool, queue_id);
148 149 150
	if (err)
		return err;

151
	if (flags & XDP_USE_NEED_WAKEUP)
152
		pool->uses_need_wakeup = true;
153 154 155 156 157
	/* Tx needs to be explicitly woken up the first time.  Also
	 * for supporting drivers that do not implement this
	 * feature. They will always have to call sendto() or poll().
	 */
	pool->cached_need_wakeup = XDP_WAKEUP_TX;
158

159 160
	dev_hold(netdev);

161 162 163 164
	if (force_copy)
		/* For copy-mode, we are done. */
		return 0;

165 166
	if (!netdev->netdev_ops->ndo_bpf ||
	    !netdev->netdev_ops->ndo_xsk_wakeup) {
167 168 169 170 171 172 173 174
		err = -EOPNOTSUPP;
		goto err_unreg_pool;
	}

	bpf.command = XDP_SETUP_XSK_POOL;
	bpf.xsk.pool = pool;
	bpf.xsk.queue_id = queue_id;

175
	err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
176 177 178
	if (err)
		goto err_unreg_pool;

179 180
	if (!pool->dma_pages) {
		WARN(1, "Driver did not DMA map zero-copy buffers");
181
		err = -EINVAL;
182 183
		goto err_unreg_xsk;
	}
184
	pool->umem->zc = true;
185 186
	return 0;

187 188
err_unreg_xsk:
	xp_disable_drv_zc(pool);
189 190 191
err_unreg_pool:
	if (!force_zc)
		err = 0; /* fallback to copy mode */
192
	if (err) {
193
		xsk_clear_pool_at_qid(netdev, queue_id);
194 195
		dev_put(netdev);
	}
196 197 198
	return err;
}

199 200 201 202 203 204 205 206 207 208 209 210 211
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
			 struct net_device *dev, u16 queue_id)
{
	u16 flags;

	/* One fill and completion ring required for each queue id. */
	if (!pool->fq || !pool->cq)
		return -EINVAL;

	flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
	if (pool->uses_need_wakeup)
		flags |= XDP_USE_NEED_WAKEUP;

212
	return xp_assign_dev(pool, dev, queue_id, flags);
213 214
}

215 216
void xp_clear_dev(struct xsk_buff_pool *pool)
{
217
	if (!pool->netdev)
218 219
		return;

220
	xp_disable_drv_zc(pool);
221 222 223
	xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
	dev_put(pool->netdev);
	pool->netdev = NULL;
224 225 226 227 228 229 230 231 232 233 234
}

static void xp_release_deferred(struct work_struct *work)
{
	struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool,
						  work);

	rtnl_lock();
	xp_clear_dev(pool);
	rtnl_unlock();

235 236 237 238 239 240 241 242 243 244
	if (pool->fq) {
		xskq_destroy(pool->fq);
		pool->fq = NULL;
	}

	if (pool->cq) {
		xskq_destroy(pool->cq);
		pool->cq = NULL;
	}

245
	xdp_put_umem(pool->umem, false);
246 247 248 249 250 251 252 253
	xp_destroy(pool);
}

void xp_get_pool(struct xsk_buff_pool *pool)
{
	refcount_inc(&pool->users);
}

254
bool xp_put_pool(struct xsk_buff_pool *pool)
255 256
{
	if (!pool)
257
		return false;
258 259 260 261

	if (refcount_dec_and_test(&pool->users)) {
		INIT_WORK(&pool->work, xp_release_deferred);
		schedule_work(&pool->work);
262
		return true;
263
	}
264 265

	return false;
266 267
}

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool)
{
	struct xsk_dma_map *dma_map;

	list_for_each_entry(dma_map, &pool->umem->xsk_dma_list, list) {
		if (dma_map->netdev == pool->netdev)
			return dma_map;
	}

	return NULL;
}

static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_device *netdev,
					     u32 nr_pages, struct xdp_umem *umem)
{
	struct xsk_dma_map *dma_map;

	dma_map = kzalloc(sizeof(*dma_map), GFP_KERNEL);
	if (!dma_map)
		return NULL;

	dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL);
290
	if (!dma_map->dma_pages) {
291 292 293 294 295 296 297 298
		kfree(dma_map);
		return NULL;
	}

	dma_map->netdev = netdev;
	dma_map->dev = dev;
	dma_map->dma_need_sync = false;
	dma_map->dma_pages_cnt = nr_pages;
299
	refcount_set(&dma_map->users, 1);
300 301 302 303 304 305 306 307 308 309 310 311
	list_add(&dma_map->list, &umem->xsk_dma_list);
	return dma_map;
}

static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
{
	list_del(&dma_map->list);
	kvfree(dma_map->dma_pages);
	kfree(dma_map);
}

static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
312 313 314 315
{
	dma_addr_t *dma;
	u32 i;

316 317
	for (i = 0; i < dma_map->dma_pages_cnt; i++) {
		dma = &dma_map->dma_pages[i];
318
		if (*dma) {
319
			dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
320 321 322 323 324
					     DMA_BIDIRECTIONAL, attrs);
			*dma = 0;
		}
	}

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
	xp_destroy_dma_map(dma_map);
}

void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
{
	struct xsk_dma_map *dma_map;

	if (pool->dma_pages_cnt == 0)
		return;

	dma_map = xp_find_dma_map(pool);
	if (!dma_map) {
		WARN(1, "Could not find dma_map for device");
		return;
	}

	if (!refcount_dec_and_test(&dma_map->users))
		return;

	__xp_dma_unmap(dma_map, attrs);
345 346 347 348 349 350
	kvfree(pool->dma_pages);
	pool->dma_pages_cnt = 0;
	pool->dev = NULL;
}
EXPORT_SYMBOL(xp_dma_unmap);

351
static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map)
352 353 354
{
	u32 i;

355 356 357
	for (i = 0; i < dma_map->dma_pages_cnt - 1; i++) {
		if (dma_map->dma_pages[i] + PAGE_SIZE == dma_map->dma_pages[i + 1])
			dma_map->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
358
		else
359
			dma_map->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
360 361 362
	}
}

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map)
{
	pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL);
	if (!pool->dma_pages)
		return -ENOMEM;

	pool->dev = dma_map->dev;
	pool->dma_pages_cnt = dma_map->dma_pages_cnt;
	pool->dma_need_sync = dma_map->dma_need_sync;
	memcpy(pool->dma_pages, dma_map->dma_pages,
	       pool->dma_pages_cnt * sizeof(*pool->dma_pages));

	return 0;
}

378 379 380
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
	       unsigned long attrs, struct page **pages, u32 nr_pages)
{
381
	struct xsk_dma_map *dma_map;
382
	dma_addr_t dma;
383
	int err;
384 385
	u32 i;

386 387 388 389 390
	dma_map = xp_find_dma_map(pool);
	if (dma_map) {
		err = xp_init_dma_info(pool, dma_map);
		if (err)
			return err;
391

392
		refcount_inc(&dma_map->users);
393 394
		return 0;
	}
395

396 397 398 399 400
	dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
	if (!dma_map)
		return -ENOMEM;

	for (i = 0; i < dma_map->dma_pages_cnt; i++) {
401 402 403
		dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
					 DMA_BIDIRECTIONAL, attrs);
		if (dma_mapping_error(dev, dma)) {
404
			__xp_dma_unmap(dma_map, attrs);
405 406
			return -ENOMEM;
		}
407
		if (dma_need_sync(dev, dma))
408 409
			dma_map->dma_need_sync = true;
		dma_map->dma_pages[i] = dma;
410 411 412
	}

	if (pool->unaligned)
413
		xp_check_dma_contiguity(dma_map);
M
Magnus Karlsson 已提交
414 415 416 417 418 419
	else
		for (i = 0; i < pool->heads_cnt; i++) {
			struct xdp_buff_xsk *xskb = &pool->heads[i];

			xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
		}
420 421 422 423 424 425 426

	err = xp_init_dma_info(pool, dma_map);
	if (err) {
		__xp_dma_unmap(dma_map, attrs);
		return err;
	}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
	return 0;
}
EXPORT_SYMBOL(xp_dma_map);

static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
					  u64 addr)
{
	return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
}

static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
{
	*addr = xp_unaligned_extract_addr(*addr);
	if (*addr >= pool->addrs_cnt ||
	    *addr + pool->chunk_size > pool->addrs_cnt ||
	    xp_addr_crosses_non_contig_pg(pool, *addr))
		return false;
	return true;
}

static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
{
	*addr = xp_aligned_extract_addr(pool, *addr);
	return *addr < pool->addrs_cnt;
}

static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
{
	struct xdp_buff_xsk *xskb;
	u64 addr;
	bool ok;

	if (pool->free_heads_cnt == 0)
		return NULL;

	for (;;) {
		if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
C
Ciara Loftus 已提交
464
			pool->fq->queue_empty_descs++;
465 466 467 468 469 470 471 472 473 474 475 476 477 478
			xp_release(xskb);
			return NULL;
		}

		ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
		     xp_check_aligned(pool, &addr);
		if (!ok) {
			pool->fq->invalid_descs++;
			xskq_cons_release(pool->fq);
			continue;
		}
		break;
	}

M
Magnus Karlsson 已提交
479 480 481 482 483 484 485
	if (pool->unaligned) {
		xskb = pool->free_heads[--pool->free_heads_cnt];
		xp_init_xskb_addr(xskb, pool, addr);
		if (pool->dma_pages_cnt)
			xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
	} else {
		xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
486
	}
M
Magnus Karlsson 已提交
487 488

	xskq_cons_release(pool->fq);
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
	return xskb;
}

struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
{
	struct xdp_buff_xsk *xskb;

	if (!pool->free_list_cnt) {
		xskb = __xp_alloc(pool);
		if (!xskb)
			return NULL;
	} else {
		pool->free_list_cnt--;
		xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
					free_list_node);
		list_del(&xskb->free_list_node);
	}

	xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
	xskb->xdp.data_meta = xskb->xdp.data;

510
	if (pool->dma_need_sync) {
511 512 513 514 515 516 517 518
		dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
						 pool->frame_len,
						 DMA_BIDIRECTIONAL);
	}
	return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
	u32 i, cached_cons, nb_entries;

	if (max > pool->free_heads_cnt)
		max = pool->free_heads_cnt;
	max = xskq_cons_nb_entries(pool->fq, max);

	cached_cons = pool->fq->cached_cons;
	nb_entries = max;
	i = max;
	while (i--) {
		struct xdp_buff_xsk *xskb;
		u64 addr;
		bool ok;

		__xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr);

		ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
			xp_check_aligned(pool, &addr);
		if (unlikely(!ok)) {
			pool->fq->invalid_descs++;
			nb_entries--;
			continue;
		}

M
Magnus Karlsson 已提交
545 546 547 548 549 550 551 552 553
		if (pool->unaligned) {
			xskb = pool->free_heads[--pool->free_heads_cnt];
			xp_init_xskb_addr(xskb, pool, addr);
			if (pool->dma_pages_cnt)
				xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
		} else {
			xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
		}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
		*xdp = &xskb->xdp;
		xdp++;
	}

	xskq_cons_release_n(pool->fq, max);
	return nb_entries;
}

static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries)
{
	struct xdp_buff_xsk *xskb;
	u32 i;

	nb_entries = min_t(u32, nb_entries, pool->free_list_cnt);

	i = nb_entries;
	while (i--) {
		xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
		list_del(&xskb->free_list_node);

		*xdp = &xskb->xdp;
		xdp++;
	}
	pool->free_list_cnt -= nb_entries;

	return nb_entries;
}

u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
	u32 nb_entries1 = 0, nb_entries2;

	if (unlikely(pool->dma_need_sync)) {
		/* Slow path */
		*xdp = xp_alloc(pool);
		return !!*xdp;
	}

	if (unlikely(pool->free_list_cnt)) {
		nb_entries1 = xp_alloc_reused(pool, xdp, max);
		if (nb_entries1 == max)
			return nb_entries1;

		max -= nb_entries1;
		xdp += nb_entries1;
	}

	nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max);
	if (!nb_entries2)
		pool->fq->queue_empty_descs++;

	return nb_entries1 + nb_entries2;
}
EXPORT_SYMBOL(xp_alloc_batch);

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
	if (pool->free_list_cnt >= count)
		return true;
	return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
}
EXPORT_SYMBOL(xp_can_alloc);

void xp_free(struct xdp_buff_xsk *xskb)
{
	xskb->pool->free_list_cnt++;
	list_add(&xskb->free_list_node, &xskb->pool->free_list);
}
EXPORT_SYMBOL(xp_free);

void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
{
	addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
	return pool->addrs + addr;
}
EXPORT_SYMBOL(xp_raw_get_data);

dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
{
	addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
	return (pool->dma_pages[addr >> PAGE_SHIFT] &
		~XSK_NEXT_PG_CONTIG_MASK) +
		(addr & ~PAGE_MASK);
}
EXPORT_SYMBOL(xp_raw_get_dma);

640
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
641 642 643 644
{
	dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
				      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
}
645
EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
646

647 648
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
				 size_t size)
649 650 651 652
{
	dma_sync_single_range_for_device(pool->dev, dma, 0,
					 size, DMA_BIDIRECTIONAL);
}
653
EXPORT_SYMBOL(xp_dma_sync_for_device_slow);