sockmap_user.c 12.1 KB
Newer Older
J
John Fastabend 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 */
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
26
#include <sys/wait.h>
27
#include <time.h>
J
John Fastabend 已提交
28 29

#include <sys/time.h>
J
John Fastabend 已提交
30
#include <sys/resource.h>
J
John Fastabend 已提交
31 32 33 34 35 36 37 38 39 40
#include <sys/types.h>

#include <linux/netlink.h>
#include <linux/socket.h>
#include <linux/sock_diag.h>
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
#include <libgen.h>

41 42
#include <getopt.h>

J
John Fastabend 已提交
43 44 45 46 47 48 49 50 51 52 53
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"

int running;
void running_handler(int a);

/* randomly selected ports for testing on lo */
#define S1_PORT 10000
#define S2_PORT 10001

54 55 56 57 58 59 60 61
/* global sockets */
int s1, s2, c1, c2, p1, p2;

static const struct option long_options[] = {
	{"help",	no_argument,		NULL, 'h' },
	{"cgroup",	required_argument,	NULL, 'c' },
	{"rate",	required_argument,	NULL, 'r' },
	{"verbose",	no_argument,		NULL, 'v' },
62 63 64
	{"iov_count",	required_argument,	NULL, 'i' },
	{"length",	required_argument,	NULL, 'l' },
	{"test",	required_argument,	NULL, 't' },
65 66 67 68
	{0, 0, NULL, 0 }
};

static void usage(char *argv[])
J
John Fastabend 已提交
69
{
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	int i;

	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
	printf(" options:\n");
	for (i = 0; long_options[i].name != 0; i++) {
		printf(" --%-12s", long_options[i].name);
		if (long_options[i].flag != NULL)
			printf(" flag (internal value:%d)\n",
				*long_options[i].flag);
		else
			printf(" -%c\n", long_options[i].val);
	}
	printf("\n");
}

static int sockmap_init_sockets(void)
{
	int i, err, one = 1;
J
John Fastabend 已提交
88 89 90 91 92 93 94 95 96 97
	struct sockaddr_in addr;
	int *fds[4] = {&s1, &s2, &c1, &c2};

	s1 = s2 = p1 = p2 = c1 = c2 = 0;

	/* Init sockets */
	for (i = 0; i < 4; i++) {
		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
		if (*fds[i] < 0) {
			perror("socket s1 failed()");
98
			return errno;
J
John Fastabend 已提交
99 100 101 102 103 104 105 106 107
		}
	}

	/* Allow reuse */
	for (i = 0; i < 2; i++) {
		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
				 (char *)&one, sizeof(one));
		if (err) {
			perror("setsockopt failed()");
108
			return errno;
J
John Fastabend 已提交
109 110 111 112
		}
	}

	/* Non-blocking sockets */
113
	for (i = 0; i < 2; i++) {
J
John Fastabend 已提交
114 115 116
		err = ioctl(*fds[i], FIONBIO, (char *)&one);
		if (err < 0) {
			perror("ioctl s1 failed()");
117
			return errno;
J
John Fastabend 已提交
118 119 120 121 122 123 124 125 126 127 128 129
		}
	}

	/* Bind server sockets */
	memset(&addr, 0, sizeof(struct sockaddr_in));
	addr.sin_family = AF_INET;
	addr.sin_addr.s_addr = inet_addr("127.0.0.1");

	addr.sin_port = htons(S1_PORT);
	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
	if (err < 0) {
		perror("bind s1 failed()\n");
130
		return errno;
J
John Fastabend 已提交
131 132 133 134 135 136
	}

	addr.sin_port = htons(S2_PORT);
	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
	if (err < 0) {
		perror("bind s2 failed()\n");
137
		return errno;
J
John Fastabend 已提交
138 139 140 141 142 143 144
	}

	/* Listen server sockets */
	addr.sin_port = htons(S1_PORT);
	err = listen(s1, 32);
	if (err < 0) {
		perror("listen s1 failed()\n");
145
		return errno;
J
John Fastabend 已提交
146 147 148 149 150 151
	}

	addr.sin_port = htons(S2_PORT);
	err = listen(s2, 32);
	if (err < 0) {
		perror("listen s1 failed()\n");
152
		return errno;
J
John Fastabend 已提交
153 154 155 156 157 158 159
	}

	/* Initiate Connect */
	addr.sin_port = htons(S1_PORT);
	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
	if (err < 0 && errno != EINPROGRESS) {
		perror("connect c1 failed()\n");
160
		return errno;
J
John Fastabend 已提交
161 162 163 164 165 166
	}

	addr.sin_port = htons(S2_PORT);
	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
	if (err < 0 && errno != EINPROGRESS) {
		perror("connect c2 failed()\n");
167 168 169
		return errno;
	} else if (err < 0) {
		err = 0;
J
John Fastabend 已提交
170 171 172 173 174 175
	}

	/* Accept Connecrtions */
	p1 = accept(s1, NULL, NULL);
	if (p1 < 0) {
		perror("accept s1 failed()\n");
176
		return errno;
J
John Fastabend 已提交
177 178 179 180 181
	}

	p2 = accept(s2, NULL, NULL);
	if (p2 < 0) {
		perror("accept s1 failed()\n");
182
		return errno;
J
John Fastabend 已提交
183 184 185 186 187
	}

	printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
	printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
		c1, s1, c2, s2);
188 189 190
	return 0;
}

191 192 193
struct msg_stats {
	size_t bytes_sent;
	size_t bytes_recvd;
194 195
	struct timespec start;
	struct timespec end;
196 197 198 199 200 201
};

static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
		    struct msg_stats *s, bool tx)
{
	struct msghdr msg = {0};
202
	int err, i, flags = MSG_NOSIGNAL;
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
	struct iovec *iov;

	iov = calloc(iov_count, sizeof(struct iovec));
	if (!iov)
		return errno;

	for (i = 0; i < iov_count; i++) {
		char *d = calloc(iov_length, sizeof(char));

		if (!d) {
			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
			goto out_errno;
		}
		iov[i].iov_base = d;
		iov[i].iov_len = iov_length;
	}

	msg.msg_iov = iov;
	msg.msg_iovlen = iov_count;

	if (tx) {
224
		clock_gettime(CLOCK_MONOTONIC, &s->start);
225 226 227 228 229 230 231 232 233
		for (i = 0; i < cnt; i++) {
			int sent = sendmsg(fd, &msg, flags);

			if (sent < 0) {
				perror("send loop error:");
				goto out_errno;
			}
			s->bytes_sent += sent;
		}
234
		clock_gettime(CLOCK_MONOTONIC, &s->end);
235 236 237 238 239 240 241
	} else {
		int slct, recv, max_fd = fd;
		struct timeval timeout;
		float total_bytes;
		fd_set w;

		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
242 243 244
		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
		if (err < 0)
			perror("recv start time: ");
245 246 247 248 249 250 251 252 253 254 255
		while (s->bytes_recvd < total_bytes) {
			timeout.tv_sec = 1;
			timeout.tv_usec = 0;

			/* FD sets */
			FD_ZERO(&w);
			FD_SET(fd, &w);

			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
			if (slct == -1) {
				perror("select()");
256
				clock_gettime(CLOCK_MONOTONIC, &s->end);
257 258 259 260
				goto out_errno;
			} else if (!slct) {
				fprintf(stderr, "unexpected timeout\n");
				errno = -EIO;
261
				clock_gettime(CLOCK_MONOTONIC, &s->end);
262 263 264 265 266 267
				goto out_errno;
			}

			recv = recvmsg(fd, &msg, flags);
			if (recv < 0) {
				if (errno != EWOULDBLOCK) {
268
					clock_gettime(CLOCK_MONOTONIC, &s->end);
269 270 271 272 273 274 275
					perror("recv failed()\n");
					goto out_errno;
				}
			}

			s->bytes_recvd += recv;
		}
276
		clock_gettime(CLOCK_MONOTONIC, &s->end);
277 278 279 280 281 282 283 284 285 286 287 288 289
	}

	for (i = 0; i < iov_count; i++)
		free(iov[i].iov_base);
	free(iov);
	return 0;
out_errno:
	for (i = 0; i < iov_count; i++)
		free(iov[i].iov_base);
	free(iov);
	return errno;
}

290 291 292 293 294 295 296 297 298 299 300 301
static float giga = 1000000000;

static inline float sentBps(struct msg_stats s)
{
	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
}

static inline float recvdBps(struct msg_stats s)
{
	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
}

302 303
static int sendmsg_test(int iov_count, int iov_buf, int cnt,
			int verbose, bool base)
304
{
305 306
	float sent_Bps = 0, recvd_Bps = 0;
	int rx_fd, txpid, rxpid, err = 0;
307
	struct msg_stats s = {0};
308 309 310 311
	int status;

	errno = 0;

312 313 314 315 316
	if (base)
		rx_fd = p1;
	else
		rx_fd = p2;

317 318
	rxpid = fork();
	if (rxpid == 0) {
319
		err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
320 321 322 323 324 325
		if (err)
			fprintf(stderr,
				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
				iov_count, iov_buf, cnt, err);
		shutdown(p2, SHUT_RDWR);
		shutdown(p1, SHUT_RDWR);
326 327 328 329 330 331 332 333
		if (s.end.tv_sec - s.start.tv_sec) {
			sent_Bps = sentBps(s);
			recvd_Bps = recvdBps(s);
		}
		fprintf(stdout,
			"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
			s.bytes_sent, sent_Bps, sent_Bps/giga,
			s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
334 335 336 337
		exit(1);
	} else if (rxpid == -1) {
		perror("msg_loop_rx: ");
		return errno;
338 339
	}

340 341 342 343 344 345 346 347
	txpid = fork();
	if (txpid == 0) {
		err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
		if (err)
			fprintf(stderr,
				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
				iov_count, iov_buf, cnt, err);
		shutdown(c1, SHUT_RDWR);
348 349 350 351 352 353 354 355
		if (s.end.tv_sec - s.start.tv_sec) {
			sent_Bps = sentBps(s);
			recvd_Bps = recvdBps(s);
		}
		fprintf(stdout,
			"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
			s.bytes_sent, sent_Bps, sent_Bps/giga,
			s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
356 357 358 359 360
		exit(1);
	} else if (txpid == -1) {
		perror("msg_loop_tx: ");
		return errno;
	}
361

362 363
	assert(waitpid(rxpid, &status, 0) == rxpid);
	assert(waitpid(txpid, &status, 0) == txpid);
364 365 366
	return err;
}

367 368 369 370 371 372 373 374
static int forever_ping_pong(int rate, int verbose)
{
	struct timeval timeout;
	char buf[1024] = {0};
	int sc;

	timeout.tv_sec = 10;
	timeout.tv_usec = 0;
J
John Fastabend 已提交
375 376 377 378 379

	/* Ping/Pong data from client to server */
	sc = send(c1, buf, sizeof(buf), 0);
	if (sc < 0) {
		perror("send failed()\n");
380
		return sc;
J
John Fastabend 已提交
381 382 383
	}

	do {
384 385
		int s, rc, i, max_fd = p2;
		fd_set w;
J
John Fastabend 已提交
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412

		/* FD sets */
		FD_ZERO(&w);
		FD_SET(c1, &w);
		FD_SET(c2, &w);
		FD_SET(p1, &w);
		FD_SET(p2, &w);

		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
		if (s == -1) {
			perror("select()");
			break;
		} else if (!s) {
			fprintf(stderr, "unexpected timeout\n");
			break;
		}

		for (i = 0; i <= max_fd && s > 0; ++i) {
			if (!FD_ISSET(i, &w))
				continue;

			s--;

			rc = recv(i, buf, sizeof(buf), 0);
			if (rc < 0) {
				if (errno != EWOULDBLOCK) {
					perror("recv failed()\n");
413
					return rc;
J
John Fastabend 已提交
414 415 416 417 418 419 420 421 422 423 424
				}
			}

			if (rc == 0) {
				close(i);
				break;
			}

			sc = send(i, buf, rc, 0);
			if (sc < 0) {
				perror("send failed()\n");
425
				return sc;
J
John Fastabend 已提交
426 427
			}
		}
428 429 430 431 432

		if (rate)
			sleep(rate);

		if (verbose) {
J
John Fastabend 已提交
433 434 435 436 437 438
			printf(".");
			fflush(stdout);

		}
	} while (running);

439
	return 0;
J
John Fastabend 已提交
440 441
}

442 443 444
enum {
	PING_PONG,
	SENDMSG,
445
	BASE,
446 447
};

J
John Fastabend 已提交
448 449
int main(int argc, char **argv)
{
450
	int iov_count = 1, length = 1024, rate = 1, verbose = 0;
J
John Fastabend 已提交
451
	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
452
	int opt, longindex, err, cg_fd = 0;
453
	int test = PING_PONG;
J
John Fastabend 已提交
454 455
	char filename[256];

456
	while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
				  long_options, &longindex)) != -1) {
		switch (opt) {
		/* Cgroup configuration */
		case 'c':
			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
			if (cg_fd < 0) {
				fprintf(stderr,
					"ERROR: (%i) open cg path failed: %s\n",
					cg_fd, optarg);
				return cg_fd;
			}
			break;
		case 'r':
			rate = atoi(optarg);
			break;
		case 'v':
			verbose = 1;
			break;
475 476 477 478 479 480 481 482 483 484 485
		case 'i':
			iov_count = atoi(optarg);
			break;
		case 'l':
			length = atoi(optarg);
			break;
		case 't':
			if (strcmp(optarg, "ping") == 0) {
				test = PING_PONG;
			} else if (strcmp(optarg, "sendmsg") == 0) {
				test = SENDMSG;
486 487
			} else if (strcmp(optarg, "base") == 0) {
				test = BASE;
488 489 490 491 492
			} else {
				usage(argv);
				return -1;
			}
			break;
493 494 495 496 497 498 499 500 501 502 503 504 505
		case 'h':
		default:
			usage(argv);
			return -1;
		}
	}

	if (!cg_fd) {
		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
			argv[0]);
		return -1;
	}

J
John Fastabend 已提交
506 507 508 509 510
	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
		perror("setrlimit(RLIMIT_MEMLOCK)");
		return 1;
	}

J
John Fastabend 已提交
511 512 513 514 515 516 517
	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);

	running = 1;

	/* catch SIGINT */
	signal(SIGINT, running_handler);

518 519 520 521
	/* If base test skip BPF setup */
	if (test == BASE)
		goto run;

J
John Fastabend 已提交
522 523 524 525 526 527 528
	if (load_bpf_file(filename)) {
		fprintf(stderr, "load_bpf_file: (%s) %s\n",
			filename, strerror(errno));
		return 1;
	}

	/* Attach programs to sockmap */
529 530 531 532 533 534 535 536 537 538
	err = bpf_prog_attach(prog_fd[0], map_fd[0],
				BPF_SK_SKB_STREAM_PARSER, 0);
	if (err) {
		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
			err, strerror(errno));
		return err;
	}

	err = bpf_prog_attach(prog_fd[1], map_fd[0],
				BPF_SK_SKB_STREAM_VERDICT, 0);
J
John Fastabend 已提交
539 540 541 542 543 544 545 546 547 548 549 550 551 552
	if (err) {
		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
			err, strerror(errno));
		return err;
	}

	/* Attach to cgroups */
	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
	if (err) {
		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
			err, strerror(errno));
		return err;
	}

553
run:
554
	err = sockmap_init_sockets();
J
John Fastabend 已提交
555 556
	if (err) {
		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
557
		goto out;
J
John Fastabend 已提交
558
	}
559

560 561 562
	if (test == PING_PONG)
		err = forever_ping_pong(rate, verbose);
	else if (test == SENDMSG)
563 564 565
		err = sendmsg_test(iov_count, length, rate, verbose, false);
	else if (test == BASE)
		err = sendmsg_test(iov_count, length, rate, verbose, true);
566 567
	else
		fprintf(stderr, "unknown test\n");
568 569 570 571 572 573 574 575 576
out:
	close(s1);
	close(s2);
	close(p1);
	close(p2);
	close(c1);
	close(c2);
	close(cg_fd);
	return err;
J
John Fastabend 已提交
577 578 579 580 581 582
}

void running_handler(int a)
{
	running = 0;
}