xdpsock_user.c 20.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
B
Björn Töpel 已提交
2
/* Copyright(c) 2017 - 2018 Intel Corporation. */
3

4
#include <asm/barrier.h>
5 6 7 8
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <linux/bpf.h>
9
#include <linux/compiler.h>
10 11 12
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
13 14
#include <locale.h>
#include <net/ethernet.h>
15
#include <net/if.h>
16 17
#include <poll.h>
#include <pthread.h>
18 19 20 21 22
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
23
#include <sys/mman.h>
24 25
#include <sys/resource.h>
#include <sys/socket.h>
26
#include <sys/types.h>
27 28 29
#include <time.h>
#include <unistd.h>

30 31
#include "libbpf.h"
#include "xsk.h"
32
#include "xdpsock.h"
33
#include <bpf/bpf.h>
34 35 36 37 38 39 40 41 42 43 44 45 46

#ifndef SOL_XDP
#define SOL_XDP 283
#endif

#ifndef AF_XDP
#define AF_XDP 44
#endif

#ifndef PF_XDP
#define PF_XDP AF_XDP
#endif

47 48
#define NUM_FRAMES (4 * 1024)
#define BATCH_SIZE 64
49 50 51

#define DEBUG_HEXDUMP 0

B
Björn Töpel 已提交
52
typedef __u64 u64;
53 54 55 56 57 58 59 60 61 62 63
typedef __u32 u32;

static unsigned long prev_time;

enum benchmark_type {
	BENCH_RXDROP = 0,
	BENCH_TXONLY = 1,
	BENCH_L2FWD = 2,
};

static enum benchmark_type opt_bench = BENCH_RXDROP;
64
static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
65 66 67 68 69
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
70
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
71 72
static u32 opt_umem_flags;
static int opt_unaligned_chunks;
73
static int opt_mmap_flags;
74
static u32 opt_xdp_bind_flags;
75
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
76 77
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
78 79
static u32 opt_num_xsks = 1;
static u32 prog_id;
80

81 82 83 84 85
struct xsk_umem_info {
	struct xsk_ring_prod fq;
	struct xsk_ring_cons cq;
	struct xsk_umem *umem;
	void *buffer;
86 87
};

88 89 90 91 92
struct xsk_socket_info {
	struct xsk_ring_cons rx;
	struct xsk_ring_prod tx;
	struct xsk_umem_info *umem;
	struct xsk_socket *xsk;
93 94 95 96
	unsigned long rx_npkts;
	unsigned long tx_npkts;
	unsigned long prev_rx_npkts;
	unsigned long prev_tx_npkts;
97
	u32 outstanding_tx;
98 99 100
};

static int num_socks;
101
struct xsk_socket_info *xsks[MAX_SOCKS];
102 103 104 105 106 107 108 109 110

static unsigned long get_nsecs(void)
{
	struct timespec ts;

	clock_gettime(CLOCK_MONOTONIC, &ts);
	return ts.tv_sec * 1000000000UL + ts.tv_nsec;
}

111
static void print_benchmark(bool running)
112
{
113
	const char *bench_str = "INVALID";
114

115 116 117 118 119 120
	if (opt_bench == BENCH_RXDROP)
		bench_str = "rxdrop";
	else if (opt_bench == BENCH_TXONLY)
		bench_str = "txonly";
	else if (opt_bench == BENCH_L2FWD)
		bench_str = "l2fwd";
121

122 123 124 125 126 127 128
	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
		printf("xdp-skb ");
	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
		printf("xdp-drv ");
	else
		printf("	");
129

130 131
	if (opt_poll)
		printf("poll() ");
132

133 134 135
	if (running) {
		printf("running...");
		fflush(stdout);
136 137 138
	}
}

139
static void dump_stats(void)
140
{
141 142 143
	unsigned long now = get_nsecs();
	long dt = now - prev_time;
	int i;
144

145
	prev_time = now;
146

147 148 149
	for (i = 0; i < num_socks && xsks[i]; i++) {
		char *fmt = "%-15s %'-11.0f %'-11lu\n";
		double rx_pps, tx_pps;
150

151 152 153 154
		rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
			 1000000000. / dt;
		tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
			 1000000000. / dt;
155

156 157 158
		printf("\n sock%d@", i);
		print_benchmark(false);
		printf("\n");
159

160 161 162 163
		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
		       dt / 1000000000.);
		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
164

165 166
		xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
		xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
167 168 169
	}
}

170
static void *poller(void *arg)
171
{
172 173 174 175
	(void)arg;
	for (;;) {
		sleep(opt_interval);
		dump_stats();
176 177
	}

178
	return NULL;
179 180
}

181
static void remove_xdp_program(void)
182
{
183
	u32 curr_prog_id = 0;
184

185 186 187
	if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
		printf("bpf_get_link_xdp_id failed\n");
		exit(EXIT_FAILURE);
188
	}
189 190 191 192 193 194
	if (prog_id == curr_prog_id)
		bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
	else if (!curr_prog_id)
		printf("couldn't find a prog id on a given interface\n");
	else
		printf("program on interface changed, not removing\n");
195 196
}

197
static void int_exit(int sig)
198
{
199
	struct xsk_umem *umem = xsks[0]->umem->umem;
200
	int i;
201

202
	dump_stats();
203 204
	for (i = 0; i < num_socks; i++)
		xsk_socket__delete(xsks[i]->xsk);
205 206
	(void)xsk_umem__delete(umem);
	remove_xdp_program();
207

208
	exit(EXIT_SUCCESS);
209 210
}

211 212
static void __exit_with_error(int error, const char *file, const char *func,
			      int line)
213
{
214 215 216 217 218
	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
		line, error, strerror(error));
	dump_stats();
	remove_xdp_program();
	exit(EXIT_FAILURE);
219 220
}

221 222
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
						 __LINE__)
223

224 225 226 227 228
static const char pkt_data[] =
	"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
	"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
	"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
	"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
229 230 231 232 233 234 235 236 237 238 239 240 241

static void swap_mac_addresses(void *data)
{
	struct ether_header *eth = (struct ether_header *)data;
	struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
	struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
	struct ether_addr tmp;

	tmp = *src_addr;
	*src_addr = *dst_addr;
	*dst_addr = tmp;
}

B
Björn Töpel 已提交
242
static void hex_dump(void *pkt, size_t length, u64 addr)
243 244 245 246 247
{
	const unsigned char *address = (unsigned char *)pkt;
	const unsigned char *line = address;
	size_t line_size = 32;
	unsigned char c;
B
Björn Töpel 已提交
248 249
	char buf[32];
	int i = 0;
250

B
Björn Töpel 已提交
251 252 253 254
	if (!DEBUG_HEXDUMP)
		return;

	sprintf(buf, "addr=%llu", addr);
255
	printf("length = %zu\n", length);
B
Björn Töpel 已提交
256
	printf("%s | ", buf);
257 258 259 260 261 262 263 264 265 266 267 268 269 270
	while (length-- > 0) {
		printf("%02X ", *address++);
		if (!(++i % line_size) || (length == 0 && i % line_size)) {
			if (length == 0) {
				while (i++ % line_size)
					printf("__ ");
			}
			printf(" | ");	/* right close */
			while (line < address) {
				c = *line++;
				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
			}
			printf("\n");
			if (length > 0)
B
Björn Töpel 已提交
271
				printf("%s | ", buf);
272 273 274 275 276
		}
	}
	printf("\n");
}

277
static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
278
{
279 280
	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
	       sizeof(pkt_data) - 1);
281 282 283
	return sizeof(pkt_data) - 1;
}

284
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
285
{
286
	struct xsk_umem_info *umem;
287 288 289 290 291
	struct xsk_umem_config cfg = {
		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
		.frame_size = opt_xsk_frame_size,
		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
292
		.flags = opt_umem_flags
293
	};
294
	int ret;
295 296

	umem = calloc(1, sizeof(*umem));
297 298
	if (!umem)
		exit_with_error(errno);
299

300
	ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
301
			       &cfg);
302 303
	if (ret)
		exit_with_error(-ret);
304

305 306 307 308 309 310 311 312 313
	umem->buffer = buffer;
	return umem;
}

static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
{
	int ret, i;
	u32 idx;

314 315 316 317 318 319 320 321
	ret = xsk_ring_prod__reserve(&umem->fq,
				     XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
		exit_with_error(-ret);
	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
		*xsk_ring_prod__fill_addr(&umem->fq, idx++) =
			i * opt_xsk_frame_size;
	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
322 323
}

324 325
static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
						    bool rx, bool tx)
326
{
327 328
	struct xsk_socket_config cfg;
	struct xsk_socket_info *xsk;
329 330
	struct xsk_ring_cons *rxr;
	struct xsk_ring_prod *txr;
331
	int ret;
332 333

	xsk = calloc(1, sizeof(*xsk));
334 335 336 337 338 339
	if (!xsk)
		exit_with_error(errno);

	xsk->umem = umem;
	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
340 341 342 343
	if (opt_num_xsks > 1)
		cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
	else
		cfg.libbpf_flags = 0;
344 345
	cfg.xdp_flags = opt_xdp_flags;
	cfg.bind_flags = opt_xdp_bind_flags;
346

347 348 349 350
	rxr = rx ? &xsk->rx : NULL;
	txr = tx ? &xsk->tx : NULL;
	ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
				 rxr, txr, &cfg);
351 352 353 354 355 356 357
	if (ret)
		exit_with_error(-ret);

	ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
	if (ret)
		exit_with_error(-ret);

358 359 360 361 362 363 364 365 366 367 368 369 370
	return xsk;
}

static struct option long_options[] = {
	{"rxdrop", no_argument, 0, 'r'},
	{"txonly", no_argument, 0, 't'},
	{"l2fwd", no_argument, 0, 'l'},
	{"interface", required_argument, 0, 'i'},
	{"queue", required_argument, 0, 'q'},
	{"poll", no_argument, 0, 'p'},
	{"xdp-skb", no_argument, 0, 'S'},
	{"xdp-native", no_argument, 0, 'N'},
	{"interval", required_argument, 0, 'n'},
371 372
	{"zero-copy", no_argument, 0, 'z'},
	{"copy", no_argument, 0, 'c'},
373
	{"frame-size", required_argument, 0, 'f'},
374
	{"no-need-wakeup", no_argument, 0, 'm'},
375
	{"unaligned", no_argument, 0, 'u'},
376
	{"shared-umem", no_argument, 0, 'M'},
377
	{"force", no_argument, 0, 'F'},
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
	{0, 0, 0, 0}
};

static void usage(const char *prog)
{
	const char *str =
		"  Usage: %s [OPTIONS]\n"
		"  Options:\n"
		"  -r, --rxdrop		Discard all incoming packets (default)\n"
		"  -t, --txonly		Only send packets\n"
		"  -l, --l2fwd		MAC swap L2 forwarding\n"
		"  -i, --interface=n	Run on interface n\n"
		"  -q, --queue=n	Use queue n (default 0)\n"
		"  -p, --poll		Use poll syscall\n"
		"  -S, --xdp-skb=n	Use XDP skb-mod\n"
393
		"  -N, --xdp-native=n	Enforce XDP native mode\n"
394
		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
395 396
		"  -z, --zero-copy      Force zero-copy mode.\n"
		"  -c, --copy           Force copy mode.\n"
397
		"  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
398 399
		"  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
		"  -u, --unaligned	Enable unaligned chunk placement\n"
400
		"  -M, --shared-umem	Enable XDP_SHARED_UMEM\n"
401
		"  -F, --force		Force loading the XDP prog\n"
402
		"\n";
403
	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
404 405 406 407 408 409 410 411 412 413
	exit(EXIT_FAILURE);
}

static void parse_command_line(int argc, char **argv)
{
	int option_index, c;

	opterr = 0;

	for (;;) {
414
		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM",
415
				long_options, &option_index);
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
		if (c == -1)
			break;

		switch (c) {
		case 'r':
			opt_bench = BENCH_RXDROP;
			break;
		case 't':
			opt_bench = BENCH_TXONLY;
			break;
		case 'l':
			opt_bench = BENCH_L2FWD;
			break;
		case 'i':
			opt_if = optarg;
			break;
		case 'q':
			opt_queue = atoi(optarg);
			break;
		case 'p':
			opt_poll = 1;
			break;
		case 'S':
			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
440
			opt_xdp_bind_flags |= XDP_COPY;
441 442
			break;
		case 'N':
443
			/* default, set below */
444 445 446 447
			break;
		case 'n':
			opt_interval = atoi(optarg);
			break;
448 449 450 451 452 453
		case 'z':
			opt_xdp_bind_flags |= XDP_ZEROCOPY;
			break;
		case 'c':
			opt_xdp_bind_flags |= XDP_COPY;
			break;
454 455 456
		case 'u':
			opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
			opt_unaligned_chunks = 1;
457
			opt_mmap_flags = MAP_HUGETLB;
458
			break;
459 460 461
		case 'F':
			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
			break;
462 463
		case 'f':
			opt_xsk_frame_size = atoi(optarg);
464
			break;
465 466 467
		case 'm':
			opt_need_wakeup = false;
			opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
468
			break;
469 470 471
		case 'M':
			opt_num_xsks = MAX_SOCKS;
			break;
472 473 474 475 476
		default:
			usage(basename(argv[0]));
		}
	}

477 478 479
	if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
		opt_xdp_flags |= XDP_FLAGS_DRV_MODE;

480 481 482 483 484 485
	opt_ifindex = if_nametoindex(opt_if);
	if (!opt_ifindex) {
		fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
			opt_if);
		usage(basename(argv[0]));
	}
486

487 488
	if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
	    !opt_unaligned_chunks) {
489 490 491 492
		fprintf(stderr, "--frame-size=%d is not a power of two\n",
			opt_xsk_frame_size);
		usage(basename(argv[0]));
	}
493 494
}

495
static void kick_tx(struct xsk_socket_info *xsk)
496 497 498
{
	int ret;

499
	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
500
	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
501
		return;
502
	exit_with_error(errno);
503 504
}

505 506
static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
				     struct pollfd *fds)
507
{
508
	struct xsk_umem_info *umem = xsk->umem;
509
	u32 idx_cq = 0, idx_fq = 0;
510 511 512 513 514 515
	unsigned int rcvd;
	size_t ndescs;

	if (!xsk->outstanding_tx)
		return;

516 517 518
	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
		kick_tx(xsk);

519
	ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
520
		xsk->outstanding_tx;
521 522

	/* re-add completed Tx buffers */
523
	rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
524
	if (rcvd > 0) {
525 526 527
		unsigned int i;
		int ret;

528
		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
529 530 531
		while (ret != rcvd) {
			if (ret < 0)
				exit_with_error(-ret);
532
			if (xsk_ring_prod__needs_wakeup(&umem->fq))
533
				ret = poll(fds, num_socks, opt_timeout);
534
			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
535
		}
536

537
		for (i = 0; i < rcvd; i++)
538 539
			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
				*xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
540 541 542

		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
543 544 545 546 547
		xsk->outstanding_tx -= rcvd;
		xsk->tx_npkts += rcvd;
	}
}

548
static inline void complete_tx_only(struct xsk_socket_info *xsk)
549 550
{
	unsigned int rcvd;
551
	u32 idx;
552 553 554 555

	if (!xsk->outstanding_tx)
		return;

556 557
	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
		kick_tx(xsk);
558

559
	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
560
	if (rcvd > 0) {
561
		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
562 563 564 565 566
		xsk->outstanding_tx -= rcvd;
		xsk->tx_npkts += rcvd;
	}
}

567
static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
568 569
{
	unsigned int rcvd, i;
570
	u32 idx_rx = 0, idx_fq = 0;
571
	int ret;
572

573
	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
574 575 576
	if (!rcvd) {
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
577
		return;
578
	}
579

580 581 582 583
	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
	while (ret != rcvd) {
		if (ret < 0)
			exit_with_error(-ret);
584 585
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
586 587 588
		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
	}

589
	for (i = 0; i < rcvd; i++) {
590 591
		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
592 593 594
		u64 orig = xsk_umem__extract_addr(addr);

		addr = xsk_umem__add_offset_to_addr(addr);
595
		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
596

597
		hex_dump(pkt, len, addr);
598
		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
599 600
	}

601 602
	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
	xsk_ring_cons__release(&xsk->rx, rcvd);
603 604 605 606 607
	xsk->rx_npkts += rcvd;
}

static void rx_drop_all(void)
{
608
	struct pollfd fds[MAX_SOCKS] = {};
609
	int i, ret;
610 611

	for (i = 0; i < num_socks; i++) {
612
		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
613 614 615 616 617
		fds[i].events = POLLIN;
	}

	for (;;) {
		if (opt_poll) {
618
			ret = poll(fds, num_socks, opt_timeout);
619 620 621 622 623
			if (ret <= 0)
				continue;
		}

		for (i = 0; i < num_socks; i++)
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
			rx_drop(xsks[i], fds);
	}
}

static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
{
	u32 idx;

	if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
		unsigned int i;

		for (i = 0; i < BATCH_SIZE; i++) {
			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr	=
				(frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
				sizeof(pkt_data) - 1;
		}

		xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
		xsk->outstanding_tx += BATCH_SIZE;
		frame_nb += BATCH_SIZE;
		frame_nb %= NUM_FRAMES;
646
	}
647 648

	complete_tx_only(xsk);
649 650
}

651
static void tx_only_all(void)
652
{
653
	struct pollfd fds[MAX_SOCKS] = {};
654 655
	u32 frame_nb[MAX_SOCKS] = {};
	int i, ret;
656

657 658 659 660
	for (i = 0; i < num_socks; i++) {
		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
		fds[0].events = POLLOUT;
	}
661 662 663

	for (;;) {
		if (opt_poll) {
664
			ret = poll(fds, num_socks, opt_timeout);
665 666 667
			if (ret <= 0)
				continue;

668
			if (!(fds[0].revents & POLLOUT))
669 670 671
				continue;
		}

672 673
		for (i = 0; i < num_socks; i++)
			tx_only(xsks[i], frame_nb[i]);
674 675 676
	}
}

677
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
678
{
679 680 681
	unsigned int rcvd, i;
	u32 idx_rx = 0, idx_tx = 0;
	int ret;
682

683
	complete_tx_l2fwd(xsk, fds);
684

685 686 687 688 689 690
	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
	if (!rcvd) {
		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
			ret = poll(fds, num_socks, opt_timeout);
		return;
	}
691

692 693 694 695 696 697
	ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
	while (ret != rcvd) {
		if (ret < 0)
			exit_with_error(-ret);
		if (xsk_ring_prod__needs_wakeup(&xsk->tx))
			kick_tx(xsk);
698
		ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
699 700 701 702 703
	}

	for (i = 0; i < rcvd; i++) {
		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
704
		u64 orig = addr;
705 706

		addr = xsk_umem__add_offset_to_addr(addr);
707 708 709
		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);

		swap_mac_addresses(pkt);
710

711
		hex_dump(pkt, len, addr);
712
		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
713 714
		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
	}
715

716 717
	xsk_ring_prod__submit(&xsk->tx, rcvd);
	xsk_ring_cons__release(&xsk->rx, rcvd);
718

719 720 721 722 723 724
	xsk->rx_npkts += rcvd;
	xsk->outstanding_tx += rcvd;
}

static void l2fwd_all(void)
{
725
	struct pollfd fds[MAX_SOCKS] = {};
726 727 728 729 730 731
	int i, ret;

	for (i = 0; i < num_socks; i++) {
		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
		fds[i].events = POLLOUT | POLLIN;
	}
732

733 734 735 736 737 738
	for (;;) {
		if (opt_poll) {
			ret = poll(fds, num_socks, opt_timeout);
			if (ret <= 0)
				continue;
		}
739

740 741
		for (i = 0; i < num_socks; i++)
			l2fwd(xsks[i], fds);
742 743 744
	}
}

745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
static void load_xdp_program(char **argv, struct bpf_object **obj)
{
	struct bpf_prog_load_attr prog_load_attr = {
		.prog_type      = BPF_PROG_TYPE_XDP,
	};
	char xdp_filename[256];
	int prog_fd;

	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
	prog_load_attr.file = xdp_filename;

	if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
		exit(EXIT_FAILURE);
	if (prog_fd < 0) {
		fprintf(stderr, "ERROR: no program found: %s\n",
			strerror(prog_fd));
		exit(EXIT_FAILURE);
	}

	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
		fprintf(stderr, "ERROR: link set xdp fd failed\n");
		exit(EXIT_FAILURE);
	}
}

static void enter_xsks_into_map(struct bpf_object *obj)
{
	struct bpf_map *map;
	int i, xsks_map;

	map = bpf_object__find_map_by_name(obj, "xsks_map");
	xsks_map = bpf_map__fd(map);
	if (xsks_map < 0) {
		fprintf(stderr, "ERROR: no xsks map found: %s\n",
			strerror(xsks_map));
			exit(EXIT_FAILURE);
	}

	for (i = 0; i < num_socks; i++) {
		int fd = xsk_socket__fd(xsks[i]->xsk);
		int key, ret;

		key = i;
		ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
		if (ret) {
			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
			exit(EXIT_FAILURE);
		}
	}
}

796 797 798
int main(int argc, char **argv)
{
	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
799
	bool rx = false, tx = false;
800
	struct xsk_umem_info *umem;
801
	struct bpf_object *obj;
802
	pthread_t pt;
803
	int i, ret;
804
	void *bufs;
805 806 807 808 809 810 811 812 813

	parse_command_line(argc, argv);

	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
		fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
			strerror(errno));
		exit(EXIT_FAILURE);
	}

814 815 816
	if (opt_num_xsks > 1)
		load_xdp_program(argv, &obj);

817 818 819 820 821 822 823 824
	/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
	bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
		    PROT_READ | PROT_WRITE,
		    MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
	if (bufs == MAP_FAILED) {
		printf("ERROR: mmap failed\n");
		exit(EXIT_FAILURE);
	}
825 826

	/* Create sockets... */
827
	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
828 829 830 831 832 833
	if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
		rx = true;
		xsk_populate_fill_ring(umem);
	}
	if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
		tx = true;
834
	for (i = 0; i < opt_num_xsks; i++)
835
		xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
836

837 838 839
	if (opt_bench == BENCH_TXONLY)
		for (i = 0; i < NUM_FRAMES; i++)
			gen_eth_frame(umem, i * opt_xsk_frame_size);
840

841 842
	if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
		enter_xsks_into_map(obj);
843 844 845 846 847 848 849 850

	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);
	signal(SIGABRT, int_exit);

	setlocale(LC_ALL, "");

	ret = pthread_create(&pt, NULL, poller, NULL);
851 852
	if (ret)
		exit_with_error(ret);
853 854 855 856 857 858

	prev_time = get_nsecs();

	if (opt_bench == BENCH_RXDROP)
		rx_drop_all();
	else if (opt_bench == BENCH_TXONLY)
859
		tx_only_all();
860
	else
861
		l2fwd_all();
862 863 864

	return 0;
}