xdpsock_user.c 19.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
B
Björn Töpel 已提交
2
/* Copyright(c) 2017 - 2018 Intel Corporation. */
3

4
#include <asm/barrier.h>
5 6 7 8
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <linux/bpf.h>
9
#include <linux/compiler.h>
10 11 12
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
13 14
#include <locale.h>
#include <net/ethernet.h>
15
#include <net/if.h>
16 17
#include <poll.h>
#include <pthread.h>
18 19 20 21 22
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
23
#include <sys/mman.h>
24 25
#include <sys/resource.h>
#include <sys/socket.h>
26
#include <sys/types.h>
27 28 29
#include <time.h>
#include <unistd.h>

30 31
#include "libbpf.h"
#include "xsk.h"
32
#include "xdpsock.h"
33
#include <bpf/bpf.h>
34 35 36 37 38 39 40 41 42 43 44 45 46

#ifndef SOL_XDP
#define SOL_XDP 283
#endif

#ifndef AF_XDP
#define AF_XDP 44
#endif

#ifndef PF_XDP
#define PF_XDP AF_XDP
#endif

47 48
#define NUM_FRAMES (4 * 1024)
#define BATCH_SIZE 64
49 50 51

#define DEBUG_HEXDUMP 0

B
Björn Töpel 已提交
52
typedef __u64 u64;
53 54 55 56 57 58 59 60 61 62 63
typedef __u32 u32;

static unsigned long prev_time;

enum benchmark_type {
	BENCH_RXDROP = 0,
	BENCH_TXONLY = 1,
	BENCH_L2FWD = 2,
};

static enum benchmark_type opt_bench = BENCH_RXDROP;
64
static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
65 66 67 68 69
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
70
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
71 72
static u32 opt_umem_flags;
static int opt_unaligned_chunks;
73
static int opt_mmap_flags;
74
static u32 opt_xdp_bind_flags;
75
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
76 77
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
78 79
static u32 opt_num_xsks = 1;
static u32 prog_id;
80

81 82 83 84 85
struct xsk_umem_info {
	struct xsk_ring_prod fq;
	struct xsk_ring_cons cq;
	struct xsk_umem *umem;
	void *buffer;
86 87
};

88 89 90 91 92
struct xsk_socket_info {
	struct xsk_ring_cons rx;
	struct xsk_ring_prod tx;
	struct xsk_umem_info *umem;
	struct xsk_socket *xsk;
93 94 95 96
	unsigned long rx_npkts;
	unsigned long tx_npkts;
	unsigned long prev_rx_npkts;
	unsigned long prev_tx_npkts;
97
	u32 outstanding_tx;
98 99 100
};

static int num_socks;
101
struct xsk_socket_info *xsks[MAX_SOCKS];
102 103 104 105 106 107 108 109 110

static unsigned long get_nsecs(void)
{
	struct timespec ts;

	clock_gettime(CLOCK_MONOTONIC, &ts);
	return ts.tv_sec * 1000000000UL + ts.tv_nsec;
}

111
static void print_benchmark(bool running)
112
{
113
	const char *bench_str = "INVALID";
114

115 116 117 118 119 120
	if (opt_bench == BENCH_RXDROP)
		bench_str = "rxdrop";
	else if (opt_bench == BENCH_TXONLY)
		bench_str = "txonly";
	else if (opt_bench == BENCH_L2FWD)
		bench_str = "l2fwd";
121

122 123 124 125 126 127 128
	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
		printf("xdp-skb ");
	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
		printf("xdp-drv ");
	else
		printf("	");
129

130 131
	if (opt_poll)
		printf("poll() ");
132

133 134 135
	if (running) {
		printf("running...");
		fflush(stdout);
136 137 138
	}
}

139
static void dump_stats(void)
140
{
141 142 143
	unsigned long now = get_nsecs();
	long dt = now - prev_time;
	int i;
144

145
	prev_time = now;
146

147 148 149
	for (i = 0; i < num_socks && xsks[i]; i++) {
		char *fmt = "%-15s %'-11.0f %'-11lu\n";
		double rx_pps, tx_pps;
150

151 152 153 154
		rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
			 1000000000. / dt;
		tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
			 1000000000. / dt;
155

156 157 158
		printf("\n sock%d@", i);
		print_benchmark(false);
		printf("\n");
159

160 161 162 163
		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
		       dt / 1000000000.);
		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
164

165 166
		xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
		xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
167 168 169
	}
}

170
static void *poller(void *arg)
171
{
172 173 174 175
	(void)arg;
	for (;;) {
		sleep(opt_interval);
		dump_stats();
176 177
	}

178
	return NULL;
179 180
}

181
static void remove_xdp_program(void)
182
{
183
	u32 curr_prog_id = 0;
184

185 186 187
	if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
		printf("bpf_get_link_xdp_id failed\n");
		exit(EXIT_FAILURE);
188
	}
189 190 191 192 193 194
	if (prog_id == curr_prog_id)
		bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
	else if (!curr_prog_id)
		printf("couldn't find a prog id on a given interface\n");
	else
		printf("program on interface changed, not removing\n");
195 196
}

197
static void int_exit(int sig)
198
{
199
	struct xsk_umem *umem = xsks[0]->umem->umem;
200
	int i;
201

202
	dump_stats();
203 204
	for (i = 0; i < num_socks; i++)
		xsk_socket__delete(xsks[i]->xsk);
205 206
	(void)xsk_umem__delete(umem);
	remove_xdp_program();
207

208
	exit(EXIT_SUCCESS);
209 210
}

211 212
static void __exit_with_error(int error, const char *file, const char *func,
			      int line)
213
{
214 215 216 217 218
	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
		line, error, strerror(error));
	dump_stats();
	remove_xdp_program();
	exit(EXIT_FAILURE);
219 220
}

221 222
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
						 __LINE__)
223

224 225 226 227 228
static const char pkt_data[] =
	"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
	"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
	"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
	"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
229 230 231 232 233 234 235 236 237 238 239 240 241

static void swap_mac_addresses(void *data)
{
	struct ether_header *eth = (struct ether_header *)data;
	struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
	struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
	struct ether_addr tmp;

	tmp = *src_addr;
	*src_addr = *dst_addr;
	*dst_addr = tmp;
}

B
Björn Töpel 已提交
242
static void hex_dump(void *pkt, size_t length, u64 addr)
243 244 245 246 247
{
	const unsigned char *address = (unsigned char *)pkt;
	const unsigned char *line = address;
	size_t line_size = 32;
	unsigned char c;
B
Björn Töpel 已提交
248 249
	char buf[32];
	int i = 0;
250

B
Björn Töpel 已提交
251 252 253 254
	if (!DEBUG_HEXDUMP)
		return;

	sprintf(buf, "addr=%llu", addr);
255
	printf("length = %zu\n", length);
B
Björn Töpel 已提交
256
	printf("%s | ", buf);
257 258 259 260 261 262 263 264 265 266 267 268 269 270
	while (length-- > 0) {
		printf("%02X ", *address++);
		if (!(++i % line_size) || (length == 0 && i % line_size)) {
			if (length == 0) {
				while (i++ % line_size)
					printf("__ ");
			}
			printf(" | ");	/* right close */
			while (line < address) {
				c = *line++;
				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
			}
			printf("\n");
			if (length > 0)
B
Björn Töpel 已提交
271
				printf("%s | ", buf);
272 273 274 275 276
		}
	}
	printf("\n");
}

277
static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
278
{
279 280
	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
	       sizeof(pkt_data) - 1);
281 282 283
	return sizeof(pkt_data) - 1;
}

284
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
285
{
286
	struct xsk_umem_info *umem;
287 288 289 290 291
	struct xsk_umem_config cfg = {
		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
		.frame_size = opt_xsk_frame_size,
		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
292
		.flags = opt_umem_flags
293
	};
294
	int ret;
295 296

	umem = calloc(1, sizeof(*umem));
297 298
	if (!umem)
		exit_with_error(errno);
299

300
	ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
301
			       &cfg);
302 303
	if (ret)
		exit_with_error(-ret);
304

305 306 307 308 309 310 311 312 313
	umem->buffer = buffer;
	return umem;
}

static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
{
	int ret, i;
	u32 idx;

314 315 316 317 318 319 320 321
	ret = xsk_ring_prod__reserve(&umem->fq,
				     XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
		exit_with_error(-ret);
	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
		*xsk_ring_prod__fill_addr(&umem->fq, idx++) =
			i * opt_xsk_frame_size;
	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
322 323
}

324 325
static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
						    bool rx, bool tx)
326
{
327 328
	struct xsk_socket_config cfg;
	struct xsk_socket_info *xsk;
329 330
	struct xsk_ring_cons *rxr;
	struct xsk_ring_prod *txr;
331
	int ret;
332 333

	xsk = calloc(1, sizeof(*xsk));
334 335 336 337 338 339
	if (!xsk)
		exit_with_error(errno);

	xsk->umem = umem;
	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
340 341 342 343
	if (opt_num_xsks > 1)
		cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
	else
		cfg.libbpf_flags = 0;
344 345
	cfg.xdp_flags = opt_xdp_flags;
	cfg.bind_flags = opt_xdp_bind_flags;
346

347 348 349 350
	rxr = rx ? &xsk->rx : NULL;
	txr = tx ? &xsk->tx : NULL;
	ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
				 rxr, txr, &cfg);
351 352 353 354 355 356 357
	if (ret)
		exit_with_error(-ret);

	ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
	if (ret)
		exit_with_error(-ret);

358 359 360 361 362 363 364 365 366 367 368 369 370
	return xsk;
}

static struct option long_options[] = {
	{"rxdrop", no_argument, 0, 'r'},
	{"txonly", no_argument, 0, 't'},
	{"l2fwd", no_argument, 0, 'l'},
	{"interface", required_argument, 0, 'i'},
	{"queue", required_argument, 0, 'q'},
	{"poll", no_argument, 0, 'p'},
	{"xdp-skb", no_argument, 0, 'S'},
	{"xdp-native", no_argument, 0, 'N'},
	{"interval", required_argument, 0, 'n'},
371 372
	{"zero-copy", no_argument, 0, 'z'},
	{"copy", no_argument, 0, 'c'},
373
	{"frame-size", required_argument, 0, 'f'},
374
	{"no-need-wakeup", no_argument, 0, 'm'},
375
	{"unaligned", no_argument, 0, 'u'},
376
	{"shared-umem", no_argument, 0, 'M'},
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
	{0, 0, 0, 0}
};

static void usage(const char *prog)
{
	const char *str =
		"  Usage: %s [OPTIONS]\n"
		"  Options:\n"
		"  -r, --rxdrop		Discard all incoming packets (default)\n"
		"  -t, --txonly		Only send packets\n"
		"  -l, --l2fwd		MAC swap L2 forwarding\n"
		"  -i, --interface=n	Run on interface n\n"
		"  -q, --queue=n	Use queue n (default 0)\n"
		"  -p, --poll		Use poll syscall\n"
		"  -S, --xdp-skb=n	Use XDP skb-mod\n"
392
		"  -N, --xdp-native=n	Enforce XDP native mode\n"
393
		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
394 395
		"  -z, --zero-copy      Force zero-copy mode.\n"
		"  -c, --copy           Force copy mode.\n"
396
		"  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"
397
		"  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
398 399
		"  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
		"  -u, --unaligned	Enable unaligned chunk placement\n"
400
		"  -M, --shared-umem	Enable XDP_SHARED_UMEM\n"
401
		"\n";
402
	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
403 404 405 406 407 408 409 410 411 412
	exit(EXIT_FAILURE);
}

static void parse_command_line(int argc, char **argv)
{
	int option_index, c;

	opterr = 0;

	for (;;) {
413
		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM",
414
				long_options, &option_index);
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
		if (c == -1)
			break;

		switch (c) {
		case 'r':
			opt_bench = BENCH_RXDROP;
			break;
		case 't':
			opt_bench = BENCH_TXONLY;
			break;
		case 'l':
			opt_bench = BENCH_L2FWD;
			break;
		case 'i':
			opt_if = optarg;
			break;
		case 'q':
			opt_queue = atoi(optarg);
			break;
		case 'p':
			opt_poll = 1;
			break;
		case 'S':
			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
439
			opt_xdp_bind_flags |= XDP_COPY;
440 441 442 443 444 445 446
			break;
		case 'N':
			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
			break;
		case 'n':
			opt_interval = atoi(optarg);
			break;
447 448 449 450 451 452
		case 'z':
			opt_xdp_bind_flags |= XDP_ZEROCOPY;
			break;
		case 'c':
			opt_xdp_bind_flags |= XDP_COPY;
			break;
453 454 455
		case 'u':
			opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
			opt_unaligned_chunks = 1;
456
			opt_mmap_flags = MAP_HUGETLB;
457
			break;
458 459 460
		case 'F':
			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
			break;
461 462
		case 'f':
			opt_xsk_frame_size = atoi(optarg);
463
			break;
464 465 466
		case 'm':
			opt_need_wakeup = false;
			opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
467
			break;
468 469 470
		case 'M':
			opt_num_xsks = MAX_SOCKS;
			break;
471 472 473 474 475 476 477 478 479 480 481
		default:
			usage(basename(argv[0]));
		}
	}

	opt_ifindex = if_nametoindex(opt_if);
	if (!opt_ifindex) {
		fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
			opt_if);
		usage(basename(argv[0]));
	}
482

483 484
	if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
	    !opt_unaligned_chunks) {
485 486 487 488
		fprintf(stderr, "--frame-size=%d is not a power of two\n",
			opt_xsk_frame_size);
		usage(basename(argv[0]));
	}
489 490
}

491
static void kick_tx(struct xsk_socket_info *xsk)
492 493 494
{
	int ret;

495
	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
496
	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
497
		return;
498
	exit_with_error(errno);
499 500
}

501 502
static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
				     struct pollfd *fds)
503
{
504
	struct xsk_umem_info *umem = xsk->umem;
505
	u32 idx_cq = 0, idx_fq = 0;
506 507 508 509 510 511
	unsigned int rcvd;
	size_t ndescs;

	if (!xsk->outstanding_tx)
		return;

512 513 514
	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
		kick_tx(xsk);

515
	ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
516
		xsk->outstanding_tx;
517 518

	/* re-add completed Tx buffers */
519
	rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
520
	if (rcvd > 0) {
521 522 523
		unsigned int i;
		int ret;

524
		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
525 526 527
		while (ret != rcvd) {
			if (ret < 0)
				exit_with_error(-ret);
528
			if (xsk_ring_prod__needs_wakeup(&umem->fq))
529
				ret = poll(fds, num_socks, opt_timeout);
530
			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
531
		}
532

533
		for (i = 0; i < rcvd; i++)
534 535
			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
				*xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
536 537 538

		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
539 540 541 542 543
		xsk->outstanding_tx -= rcvd;
		xsk->tx_npkts += rcvd;
	}
}

544
static inline void complete_tx_only(struct xsk_socket_info *xsk)
545 546
{
	unsigned int rcvd;
547
	u32 idx;
548 549 550 551

	if (!xsk->outstanding_tx)
		return;

552 553
	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
		kick_tx(xsk);
554

555
	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
556
	if (rcvd > 0) {
557
		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
558 559 560 561 562
		xsk->outstanding_tx -= rcvd;
		xsk->tx_npkts += rcvd;
	}
}

563
static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
564 565
{
	unsigned int rcvd, i;
566
	u32 idx_rx = 0, idx_fq = 0;
567
	int ret;
568

569
	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
570 571 572
	if (!rcvd) {
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
573
		return;
574
	}
575

576 577 578 579
	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
	while (ret != rcvd) {
		if (ret < 0)
			exit_with_error(-ret);
580 581
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
582 583 584
		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
	}

585
	for (i = 0; i < rcvd; i++) {
586 587
		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
588 589 590
		u64 orig = xsk_umem__extract_addr(addr);

		addr = xsk_umem__add_offset_to_addr(addr);
591
		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
592

593
		hex_dump(pkt, len, addr);
594
		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
595 596
	}

597 598
	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
	xsk_ring_cons__release(&xsk->rx, rcvd);
599 600 601 602 603
	xsk->rx_npkts += rcvd;
}

static void rx_drop_all(void)
{
604
	struct pollfd fds[MAX_SOCKS] = {};
605
	int i, ret;
606 607

	for (i = 0; i < num_socks; i++) {
608
		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
609 610 611 612 613
		fds[i].events = POLLIN;
	}

	for (;;) {
		if (opt_poll) {
614
			ret = poll(fds, num_socks, opt_timeout);
615 616 617 618 619
			if (ret <= 0)
				continue;
		}

		for (i = 0; i < num_socks; i++)
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
			rx_drop(xsks[i], fds);
	}
}

static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
{
	u32 idx;

	if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
		unsigned int i;

		for (i = 0; i < BATCH_SIZE; i++) {
			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr	=
				(frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
				sizeof(pkt_data) - 1;
		}

		xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
		xsk->outstanding_tx += BATCH_SIZE;
		frame_nb += BATCH_SIZE;
		frame_nb %= NUM_FRAMES;
642
	}
643 644

	complete_tx_only(xsk);
645 646
}

647
static void tx_only_all(void)
648
{
649
	struct pollfd fds[MAX_SOCKS] = {};
650 651
	u32 frame_nb[MAX_SOCKS] = {};
	int i, ret;
652

653 654 655 656
	for (i = 0; i < num_socks; i++) {
		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
		fds[0].events = POLLOUT;
	}
657 658 659

	for (;;) {
		if (opt_poll) {
660
			ret = poll(fds, num_socks, opt_timeout);
661 662 663
			if (ret <= 0)
				continue;

664
			if (!(fds[0].revents & POLLOUT))
665 666 667
				continue;
		}

668 669
		for (i = 0; i < num_socks; i++)
			tx_only(xsks[i], frame_nb[i]);
670 671 672
	}
}

673
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
674
{
675 676 677
	unsigned int rcvd, i;
	u32 idx_rx = 0, idx_tx = 0;
	int ret;
678

679
	complete_tx_l2fwd(xsk, fds);
680

681 682 683 684 685 686
	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
	if (!rcvd) {
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
		return;
	}
687

688 689 690 691 692 693
	ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
	while (ret != rcvd) {
		if (ret < 0)
			exit_with_error(-ret);
		if (xsk_ring_prod__needs_wakeup(&xsk->tx))
			kick_tx(xsk);
694
		ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
695 696 697 698 699
	}

	for (i = 0; i < rcvd; i++) {
		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
700
		u64 orig = addr;
701 702

		addr = xsk_umem__add_offset_to_addr(addr);
703 704 705
		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);

		swap_mac_addresses(pkt);
706

707
		hex_dump(pkt, len, addr);
708
		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
709 710
		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
	}
711

712 713
	xsk_ring_prod__submit(&xsk->tx, rcvd);
	xsk_ring_cons__release(&xsk->rx, rcvd);
714

715 716 717 718 719 720
	xsk->rx_npkts += rcvd;
	xsk->outstanding_tx += rcvd;
}

static void l2fwd_all(void)
{
721
	struct pollfd fds[MAX_SOCKS] = {};
722 723 724 725 726 727
	int i, ret;

	for (i = 0; i < num_socks; i++) {
		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
		fds[i].events = POLLOUT | POLLIN;
	}
728

729 730 731 732 733 734
	for (;;) {
		if (opt_poll) {
			ret = poll(fds, num_socks, opt_timeout);
			if (ret <= 0)
				continue;
		}
735

736 737
		for (i = 0; i < num_socks; i++)
			l2fwd(xsks[i], fds);
738 739 740
	}
}

741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
static void load_xdp_program(char **argv, struct bpf_object **obj)
{
	struct bpf_prog_load_attr prog_load_attr = {
		.prog_type      = BPF_PROG_TYPE_XDP,
	};
	char xdp_filename[256];
	int prog_fd;

	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
	prog_load_attr.file = xdp_filename;

	if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
		exit(EXIT_FAILURE);
	if (prog_fd < 0) {
		fprintf(stderr, "ERROR: no program found: %s\n",
			strerror(prog_fd));
		exit(EXIT_FAILURE);
	}

	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
		fprintf(stderr, "ERROR: link set xdp fd failed\n");
		exit(EXIT_FAILURE);
	}
}

static void enter_xsks_into_map(struct bpf_object *obj)
{
	struct bpf_map *map;
	int i, xsks_map;

	map = bpf_object__find_map_by_name(obj, "xsks_map");
	xsks_map = bpf_map__fd(map);
	if (xsks_map < 0) {
		fprintf(stderr, "ERROR: no xsks map found: %s\n",
			strerror(xsks_map));
			exit(EXIT_FAILURE);
	}

	for (i = 0; i < num_socks; i++) {
		int fd = xsk_socket__fd(xsks[i]->xsk);
		int key, ret;

		key = i;
		ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
		if (ret) {
			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
			exit(EXIT_FAILURE);
		}
	}
}

792 793 794
int main(int argc, char **argv)
{
	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
795
	bool rx = false, tx = false;
796
	struct xsk_umem_info *umem;
797
	struct bpf_object *obj;
798
	pthread_t pt;
799
	int i, ret;
800
	void *bufs;
801 802 803 804 805 806 807 808 809

	parse_command_line(argc, argv);

	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
		fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
			strerror(errno));
		exit(EXIT_FAILURE);
	}

810 811 812
	if (opt_num_xsks > 1)
		load_xdp_program(argv, &obj);

813 814 815 816 817 818 819 820
	/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
	bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
		    PROT_READ | PROT_WRITE,
		    MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
	if (bufs == MAP_FAILED) {
		printf("ERROR: mmap failed\n");
		exit(EXIT_FAILURE);
	}
821 822

	/* Create sockets... */
823
	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
824 825 826 827 828 829
	if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
		rx = true;
		xsk_populate_fill_ring(umem);
	}
	if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
		tx = true;
830
	for (i = 0; i < opt_num_xsks; i++)
831
		xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
832

833 834 835
	if (opt_bench == BENCH_TXONLY)
		for (i = 0; i < NUM_FRAMES; i++)
			gen_eth_frame(umem, i * opt_xsk_frame_size);
836

837 838
	if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
		enter_xsks_into_map(obj);
839 840 841 842 843 844 845 846

	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);
	signal(SIGABRT, int_exit);

	setlocale(LC_ALL, "");

	ret = pthread_create(&pt, NULL, poller, NULL);
847 848
	if (ret)
		exit_with_error(ret);
849 850 851 852 853 854

	prev_time = get_nsecs();

	if (opt_bench == BENCH_RXDROP)
		rx_drop_all();
	else if (opt_bench == BENCH_TXONLY)
855
		tx_only_all();
856
	else
857
		l2fwd_all();
858 859 860

	return 0;
}