psock_fanout.c 10.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright 2013 Google Inc.
 * Author: Willem de Bruijn (willemb@google.com)
 *
 * A basic test of packet socket fanout behavior.
 *
 * Control:
 * - create fanout fails as expected with illegal flag combinations
 * - join   fanout fails as expected with diverging types or flags
 *
 * Datapath:
 *   Open a pair of packet sockets and a pair of INET sockets, send a known
 *   number of packets across the two INET sockets and count the number of
 *   packets enqueued onto the two packet sockets.
 *
 *   The test currently runs for
 *   - PACKET_FANOUT_HASH
 *   - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
19 20
 *   - PACKET_FANOUT_LB
 *   - PACKET_FANOUT_CPU
21
 *   - PACKET_FANOUT_ROLLOVER
22
 *   - PACKET_FANOUT_CBPF
23
 *   - PACKET_FANOUT_EBPF
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 *
 * Todo:
 * - functionality: PACKET_FANOUT_FLAG_DEFRAG
 *
 * License (GPLv2):
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */

44 45
#define _GNU_SOURCE		/* for sched_setaffinity */

46 47
#include <arpa/inet.h>
#include <errno.h>
48
#include <fcntl.h>
49
#include <linux/unistd.h>	/* for __NR_bpf */
50
#include <linux/filter.h>
51
#include <linux/bpf.h>
52 53 54 55
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
56 57
#include <poll.h>
#include <sched.h>
58 59 60 61
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
62
#include <sys/mman.h>
63 64 65 66 67
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

68
#include "psock_lib.h"
69

70
#define RING_NUM_FRAMES			20
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93

/* Open a socket in a given fanout mode.
 * @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, int num_packets)
{
	int fd, val;

	fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
	if (fd < 0) {
		perror("socket packet");
		exit(1);
	}

	/* fanout group ID is always 0: tests whether old groups are deleted */
	val = ((int) typeflags) << 16;
	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
		if (close(fd)) {
			perror("close packet");
			exit(1);
		}
		return -1;
	}

94
	pair_udp_setfilter(fd);
95 96 97
	return fd;
}

98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
static void sock_fanout_set_ebpf(int fd)
{
	const int len_off = __builtin_offsetof(struct __sk_buff, len);
	struct bpf_insn prog[] = {
		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
		{ BPF_LDX   | BPF_W   | BPF_MEM, 0, 6, len_off, 0 },
		{ BPF_JMP   | BPF_JGE | BPF_K,   0, 0, 1, DATA_LEN },
		{ BPF_JMP   | BPF_JA  | BPF_K,   0, 0, 4, 0 },
		{ BPF_LD    | BPF_B   | BPF_ABS, 0, 0, 0, 0x50 },
		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 2, DATA_CHAR },
		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 1, DATA_CHAR_1 },
		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
	};
	char log_buf[512];
	union bpf_attr attr;
	int pfd;

	memset(&attr, 0, sizeof(attr));
	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
	attr.insns = (unsigned long) prog;
	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
	attr.license = (unsigned long) "GPL";
	attr.log_buf = (unsigned long) log_buf,
	attr.log_size = sizeof(log_buf),
	attr.log_level = 1,

	pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
	if (pfd < 0) {
		perror("bpf");
		fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
		exit(1);
	}

	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
		perror("fanout data ebpf");
		exit(1);
	}

	if (close(pfd)) {
		perror("close ebpf");
		exit(1);
	}
}

143
static char *sock_fanout_open_ring(int fd)
144
{
145 146 147 148 149 150 151
	struct tpacket_req req = {
		.tp_block_size = getpagesize(),
		.tp_frame_size = getpagesize(),
		.tp_block_nr   = RING_NUM_FRAMES,
		.tp_frame_nr   = RING_NUM_FRAMES,
	};
	char *ring;
152
	int val = TPACKET_V2;
153

154 155 156 157 158
	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
		       sizeof(val))) {
		perror("packetsock ring setsockopt version");
		exit(1);
	}
159 160 161
	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
		       sizeof(req))) {
		perror("packetsock ring setsockopt");
162 163
		exit(1);
	}
164 165 166

	ring = mmap(0, req.tp_block_size * req.tp_block_nr,
		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
167 168
	if (ring == MAP_FAILED) {
		perror("packetsock ring mmap");
169 170
		exit(1);
	}
171 172 173 174 175 176

	return ring;
}

static int sock_fanout_read_ring(int fd, void *ring)
{
177
	struct tpacket2_hdr *header = ring;
178 179
	int count = 0;

180
	while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
181 182 183 184 185 186 187 188 189 190 191 192 193
		count++;
		header = ring + (count * getpagesize());
	}

	return count;
}

static int sock_fanout_read(int fds[], char *rings[], const int expect[])
{
	int ret[2];

	ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
	ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
194 195 196 197 198 199 200

	fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
			ret[0], ret[1], expect[0], expect[1]);

	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
		fprintf(stderr, "ERROR: incorrect queue lengths\n");
201
		return 1;
202
	}
203 204

	return 0;
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
}

/* Test illegal mode + flag combination */
static void test_control_single(void)
{
	fprintf(stderr, "test: control single socket\n");

	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
		exit(1);
	}
}

/* Test illegal group with different modes or flags */
static void test_control_group(void)
{
	int fds[2];

	fprintf(stderr, "test: control multiple sockets\n");

	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
	if (fds[0] == -1) {
		fprintf(stderr, "ERROR: failed to open HASH socket\n");
		exit(1);
	}
	if (sock_fanout_open(PACKET_FANOUT_HASH |
			       PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
		exit(1);
	}
	if (sock_fanout_open(PACKET_FANOUT_HASH |
			       PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
		exit(1);
	}
	if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
		fprintf(stderr, "ERROR: joined group with wrong mode\n");
		exit(1);
	}
	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
	if (fds[1] == -1) {
		fprintf(stderr, "ERROR: failed to join group\n");
		exit(1);
	}
	if (close(fds[1]) || close(fds[0])) {
		fprintf(stderr, "ERROR: closing sockets\n");
		exit(1);
	}
}

256 257
static int test_datapath(uint16_t typeflags, int port_off,
			 const int expect1[], const int expect2[])
258 259
{
	const int expect0[] = { 0, 0 };
260
	char *rings[2];
261
	uint8_t type = typeflags & 0xFF;
262
	int fds[2], fds_udp[2][2], ret;
263 264 265 266 267 268 269 270 271

	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);

	fds[0] = sock_fanout_open(typeflags, 20);
	fds[1] = sock_fanout_open(typeflags, 20);
	if (fds[0] == -1 || fds[1] == -1) {
		fprintf(stderr, "ERROR: failed open\n");
		exit(1);
	}
272 273
	if (type == PACKET_FANOUT_CBPF)
		sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
274 275
	else if (type == PACKET_FANOUT_EBPF)
		sock_fanout_set_ebpf(fds[0]);
276

277 278 279 280 281
	rings[0] = sock_fanout_open_ring(fds[0]);
	rings[1] = sock_fanout_open_ring(fds[1]);
	pair_udp_open(fds_udp[0], PORT_BASE);
	pair_udp_open(fds_udp[1], PORT_BASE + port_off);
	sock_fanout_read(fds, rings, expect0);
282 283 284

	/* Send data, but not enough to overflow a queue */
	pair_udp_send(fds_udp[0], 15);
285
	pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
286
	ret = sock_fanout_read(fds, rings, expect1);
287 288

	/* Send more data, overflow the queue */
289
	pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
290
	/* TODO: ensure consistent order between expect1 and expect2 */
291
	ret |= sock_fanout_read(fds, rings, expect2);
292

293 294 295 296 297
	if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
	    munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
		fprintf(stderr, "close rings\n");
		exit(1);
	}
298 299 300 301 302 303
	if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
	    close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
	    close(fds[1]) || close(fds[0])) {
		fprintf(stderr, "close datapath\n");
		exit(1);
	}
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322

	return ret;
}

static int set_cpuaffinity(int cpuid)
{
	cpu_set_t mask;

	CPU_ZERO(&mask);
	CPU_SET(cpuid, &mask);
	if (sched_setaffinity(0, sizeof(mask), &mask)) {
		if (errno != EINVAL) {
			fprintf(stderr, "setaffinity %d\n", cpuid);
			exit(1);
		}
		return 1;
	}

	return 0;
323 324 325 326
}

int main(int argc, char **argv)
{
327 328 329
	const int expect_hash[2][2]	= { { 15, 5 },  { 20, 5 } };
	const int expect_hash_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
	const int expect_lb[2][2]	= { { 10, 10 }, { 18, 17 } };
330
	const int expect_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
331 332
	const int expect_cpu0[2][2]	= { { 20, 0 },  { 20, 0 } };
	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
333
	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
334
	int port_off = 2, tries = 5, ret;
335 336 337 338

	test_control_single();
	test_control_group();

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
	/* find a set of ports that do not collide onto the same socket */
	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
			    expect_hash[0], expect_hash[1]);
	while (ret && tries--) {
		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
				    expect_hash[0], expect_hash[1]);
	}

	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
			     port_off, expect_hash_rb[0], expect_hash_rb[1]);
	ret |= test_datapath(PACKET_FANOUT_LB,
			     port_off, expect_lb[0], expect_lb[1]);
	ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
			     port_off, expect_rb[0], expect_rb[1]);
354

355 356
	ret |= test_datapath(PACKET_FANOUT_CBPF,
			     port_off, expect_bpf[0], expect_bpf[1]);
357 358
	ret |= test_datapath(PACKET_FANOUT_EBPF,
			     port_off, expect_bpf[0], expect_bpf[1]);
359 360 361 362 363 364 365 366 367 368 369

	set_cpuaffinity(0);
	ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
			     expect_cpu0[0], expect_cpu0[1]);
	if (!set_cpuaffinity(1))
		/* TODO: test that choice alternates with previous */
		ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
				     expect_cpu1[0], expect_cpu1[1]);

	if (ret)
		return 1;
370 371 372 373

	printf("OK. All tests passed\n");
	return 0;
}