sockex3_kern.c 6.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include <uapi/linux/in.h>
#include <uapi/linux/if.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/if_tunnel.h>
#include <uapi/linux/mpls.h>
#define IP_MF		0x2000
#define IP_OFFSET	0x1FFF

#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F

struct bpf_map_def SEC("maps") jmp_table = {
	.type = BPF_MAP_TYPE_PROG_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(u32),
	.max_entries = 8,
};

#define PARSE_VLAN 1
#define PARSE_MPLS 2
#define PARSE_IP 3
#define PARSE_IPV6 4

/* protocol dispatch routine.
 * It tail-calls next BPF program depending on eth proto
 * Note, we could have used:
 * bpf_tail_call(skb, &jmp_table, proto);
 * but it would need large prog_array
 */
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
{
	switch (proto) {
	case ETH_P_8021Q:
	case ETH_P_8021AD:
		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
		break;
	case ETH_P_MPLS_UC:
	case ETH_P_MPLS_MC:
		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
		break;
	case ETH_P_IP:
		bpf_tail_call(skb, &jmp_table, PARSE_IP);
		break;
	case ETH_P_IPV6:
		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
		break;
	}
}

struct vlan_hdr {
	__be16 h_vlan_TCI;
	__be16 h_vlan_encapsulated_proto;
};

64
struct bpf_flow_keys {
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
	__be32 src;
	__be32 dst;
	union {
		__be32 ports;
		__be16 port16[2];
	};
	__u32 ip_proto;
};

static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
{
	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
		& (IP_MF | IP_OFFSET);
}

static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
{
	__u64 w0 = load_word(ctx, off);
	__u64 w1 = load_word(ctx, off + 4);
	__u64 w2 = load_word(ctx, off + 8);
	__u64 w3 = load_word(ctx, off + 12);

	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
}

struct globals {
91
	struct bpf_flow_keys flow;
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
};

struct bpf_map_def SEC("maps") percpu_map = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(__u32),
	.value_size = sizeof(struct globals),
	.max_entries = 32,
};

/* user poor man's per_cpu until native support is ready */
static struct globals *this_cpu_globals(void)
{
	u32 key = bpf_get_smp_processor_id();

	return bpf_map_lookup_elem(&percpu_map, &key);
}

/* some simple stats for user space consumption */
struct pair {
	__u64 packets;
	__u64 bytes;
};

struct bpf_map_def SEC("maps") hash_map = {
	.type = BPF_MAP_TYPE_HASH,
117
	.key_size = sizeof(struct bpf_flow_keys),
118 119 120 121 122 123
	.value_size = sizeof(struct pair),
	.max_entries = 1024,
};

static void update_stats(struct __sk_buff *skb, struct globals *g)
{
124
	struct bpf_flow_keys key = g->flow;
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
	struct pair *value;

	value = bpf_map_lookup_elem(&hash_map, &key);
	if (value) {
		__sync_fetch_and_add(&value->packets, 1);
		__sync_fetch_and_add(&value->bytes, skb->len);
	} else {
		struct pair val = {1, skb->len};

		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
	}
}

static __always_inline void parse_ip_proto(struct __sk_buff *skb,
					   struct globals *g, __u32 ip_proto)
{
141
	__u32 nhoff = skb->cb[0];
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
	int poff;

	switch (ip_proto) {
	case IPPROTO_GRE: {
		struct gre_hdr {
			__be16 flags;
			__be16 proto;
		};

		__u32 gre_flags = load_half(skb,
					    nhoff + offsetof(struct gre_hdr, flags));
		__u32 gre_proto = load_half(skb,
					    nhoff + offsetof(struct gre_hdr, proto));

		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
			break;

		nhoff += 4;
		if (gre_flags & GRE_CSUM)
			nhoff += 4;
		if (gre_flags & GRE_KEY)
			nhoff += 4;
		if (gre_flags & GRE_SEQ)
			nhoff += 4;

167
		skb->cb[0] = nhoff;
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
		parse_eth_proto(skb, gre_proto);
		break;
	}
	case IPPROTO_IPIP:
		parse_eth_proto(skb, ETH_P_IP);
		break;
	case IPPROTO_IPV6:
		parse_eth_proto(skb, ETH_P_IPV6);
		break;
	case IPPROTO_TCP:
	case IPPROTO_UDP:
		g->flow.ports = load_word(skb, nhoff);
	case IPPROTO_ICMP:
		g->flow.ip_proto = ip_proto;
		update_stats(skb, g);
		break;
	default:
		break;
	}
}

PROG(PARSE_IP)(struct __sk_buff *skb)
{
	struct globals *g = this_cpu_globals();
	__u32 nhoff, verlen, ip_proto;

	if (!g)
		return 0;

197
	nhoff = skb->cb[0];
198 199 200 201 202 203 204 205 206 207 208 209 210 211

	if (unlikely(ip_is_fragment(skb, nhoff)))
		return 0;

	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));

	if (ip_proto != IPPROTO_GRE) {
		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
	}

	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
	nhoff += (verlen & 0xF) << 2;

212
	skb->cb[0] = nhoff;
213 214 215 216 217 218 219 220 221 222 223 224
	parse_ip_proto(skb, g, ip_proto);
	return 0;
}

PROG(PARSE_IPV6)(struct __sk_buff *skb)
{
	struct globals *g = this_cpu_globals();
	__u32 nhoff, ip_proto;

	if (!g)
		return 0;

225
	nhoff = skb->cb[0];
226 227 228 229 230 231 232 233 234

	ip_proto = load_byte(skb,
			     nhoff + offsetof(struct ipv6hdr, nexthdr));
	g->flow.src = ipv6_addr_hash(skb,
				     nhoff + offsetof(struct ipv6hdr, saddr));
	g->flow.dst = ipv6_addr_hash(skb,
				     nhoff + offsetof(struct ipv6hdr, daddr));
	nhoff += sizeof(struct ipv6hdr);

235
	skb->cb[0] = nhoff;
236 237 238 239 240 241 242 243
	parse_ip_proto(skb, g, ip_proto);
	return 0;
}

PROG(PARSE_VLAN)(struct __sk_buff *skb)
{
	__u32 nhoff, proto;

244
	nhoff = skb->cb[0];
245 246 247 248

	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
						h_vlan_encapsulated_proto));
	nhoff += sizeof(struct vlan_hdr);
249
	skb->cb[0] = nhoff;
250 251 252 253 254 255 256 257 258 259

	parse_eth_proto(skb, proto);

	return 0;
}

PROG(PARSE_MPLS)(struct __sk_buff *skb)
{
	__u32 nhoff, label;

260
	nhoff = skb->cb[0];
261 262 263

	label = load_word(skb, nhoff);
	nhoff += sizeof(struct mpls_label);
264
	skb->cb[0] = nhoff;
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284

	if (label & MPLS_LS_S_MASK) {
		__u8 verlen = load_byte(skb, nhoff);
		if ((verlen & 0xF0) == 4)
			parse_eth_proto(skb, ETH_P_IP);
		else
			parse_eth_proto(skb, ETH_P_IPV6);
	} else {
		parse_eth_proto(skb, ETH_P_MPLS_UC);
	}

	return 0;
}

SEC("socket/0")
int main_prog(struct __sk_buff *skb)
{
	__u32 nhoff = ETH_HLEN;
	__u32 proto = load_half(skb, 12);

285
	skb->cb[0] = nhoff;
286 287 288 289 290
	parse_eth_proto(skb, proto);
	return 0;
}

char _license[] SEC("license") = "GPL";