actions.c 25.0 KB
Newer Older
1
/*
2
 * Copyright (c) 2007-2014 Nicira, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
J
Joe Stringer 已提交
25
#include <linux/sctp.h>
26 27 28 29 30
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
31

32
#include <net/ip.h>
A
Ansis Atteka 已提交
33
#include <net/ipv6.h>
34 35
#include <net/checksum.h>
#include <net/dsfield.h>
36
#include <net/mpls.h>
J
Joe Stringer 已提交
37
#include <net/sctp/checksum.h>
38 39

#include "datapath.h"
40
#include "flow.h"
41 42 43
#include "vport.h"

static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
44
			      struct sw_flow_key *key,
45
			      const struct nlattr *attr, int len);
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

static struct action_fifo __percpu *action_fifos;
static DEFINE_PER_CPU(int, exec_actions_level);

static void action_fifo_init(struct action_fifo *fifo)
{
	fifo->head = 0;
	fifo->tail = 0;
}

72
static bool action_fifo_is_empty(const struct action_fifo *fifo)
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
{
	return (fifo->head == fifo->tail);
}

static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
{
	if (action_fifo_is_empty(fifo))
		return NULL;

	return &fifo->fifo[fifo->tail++];
}

static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{
	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
		return NULL;

	return &fifo->fifo[fifo->head++];
}

/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
95
						    const struct sw_flow_key *key,
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
						    const struct nlattr *attr)
{
	struct action_fifo *fifo;
	struct deferred_action *da;

	fifo = this_cpu_ptr(action_fifos);
	da = action_fifo_put(fifo);
	if (da) {
		da->skb = skb;
		da->actions = attr;
		da->pkt_key = *key;
	}

	return da;
}

112 113 114 115 116 117 118 119 120 121 122
static void invalidate_flow_key(struct sw_flow_key *key)
{
	key->eth.type = htons(0);
}

static bool is_flow_key_valid(const struct sw_flow_key *key)
{
	return !!key->eth.type;
}

static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
		     const struct ovs_action_push_mpls *mpls)
{
	__be32 *new_mpls_lse;
	struct ethhdr *hdr;

	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
	if (skb->encapsulation)
		return -ENOTSUPP;

	if (skb_cow_head(skb, MPLS_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, MPLS_HLEN);
	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);
	skb_reset_mac_header(skb);

	new_mpls_lse = (__be32 *)skb_mpls_header(skb);
	*new_mpls_lse = mpls->mpls_lse;

	if (skb->ip_summed == CHECKSUM_COMPLETE)
		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
							     MPLS_HLEN, 0));

	hdr = eth_hdr(skb);
	hdr->h_proto = mpls->mpls_ethertype;

150 151
	if (!skb->inner_protocol)
		skb_set_inner_protocol(skb, skb->protocol);
152 153
	skb->protocol = mpls->mpls_ethertype;

154
	invalidate_flow_key(key);
155 156 157
	return 0;
}

158 159
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
		    const __be16 ethertype)
160 161 162 163
{
	struct ethhdr *hdr;
	int err;

164
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
165 166 167
	if (unlikely(err))
		return err;

168
	skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
169 170 171 172 173 174 175 176 177 178 179 180 181 182

	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);

	__skb_pull(skb, MPLS_HLEN);
	skb_reset_mac_header(skb);

	/* skb_mpls_header() is used to locate the ethertype
	 * field correctly in the presence of VLAN tags.
	 */
	hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
	hdr->h_proto = ethertype;
	if (eth_p_mpls(skb->protocol))
		skb->protocol = ethertype;
183 184

	invalidate_flow_key(key);
185 186 187
	return 0;
}

188 189 190 191 192 193
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))

static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const __be32 *mpls_lse, const __be32 *mask)
194 195
{
	__be32 *stack;
196
	__be32 lse;
197 198
	int err;

199
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
200 201 202 203
	if (unlikely(err))
		return err;

	stack = (__be32 *)skb_mpls_header(skb);
204
	lse = MASKED(*stack, *mpls_lse, *mask);
205
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
206 207
		__be32 diff[] = { ~(*stack), lse };

208 209 210 211
		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
					  ~skb->csum);
	}

212 213
	*stack = lse;
	flow_key->mpls.top_lse = lse;
214 215 216
	return 0;
}

217
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
218 219 220
{
	int err;

221
	err = skb_vlan_pop(skb);
222
	if (skb_vlan_tag_present(skb))
223 224
		invalidate_flow_key(key);
	else
225
		key->eth.tci = 0;
226
	return err;
227 228
}

229 230
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
		     const struct ovs_action_push_vlan *vlan)
231
{
232
	if (skb_vlan_tag_present(skb))
233
		invalidate_flow_key(key);
234
	else
235
		key->eth.tci = vlan->vlan_tci;
236 237
	return skb_vlan_push(skb, vlan->vlan_tpid,
			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/* 'src' is already properly masked. */
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
{
	u16 *dst = (u16 *)dst_;
	const u16 *src = (const u16 *)src_;
	const u16 *mask = (const u16 *)mask_;

	SET_MASKED(dst[0], src[0], mask[0]);
	SET_MASKED(dst[1], src[1], mask[1]);
	SET_MASKED(dst[2], src[2], mask[2]);
}

static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
			const struct ovs_key_ethernet *key,
			const struct ovs_key_ethernet *mask)
255 256
{
	int err;
257

258
	err = skb_ensure_writable(skb, ETH_HLEN);
259 260 261
	if (unlikely(err))
		return err;

262 263
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

264 265 266 267
	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
			       mask->eth_src);
	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
			       mask->eth_dst);
268

269 270
	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

271 272
	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
273 274 275
	return 0;
}

276 277
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
				  __be32 addr, __be32 new_addr)
278 279 280
{
	int transport_len = skb->len - skb_transport_offset(skb);

281 282 283
	if (nh->frag_off & htons(IP_OFFSET))
		return;

284 285 286
	if (nh->protocol == IPPROTO_TCP) {
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
287
						 addr, new_addr, 1);
288
	} else if (nh->protocol == IPPROTO_UDP) {
289 290 291 292 293
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace4(&uh->check, skb,
294
							 addr, new_addr, 1);
295 296 297 298
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
299
	}
300
}
301

302 303 304 305
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
			__be32 *addr, __be32 new_addr)
{
	update_ip_l4_checksum(skb, nh, *addr, new_addr);
306
	csum_replace4(&nh->check, *addr, new_addr);
307
	skb_clear_hash(skb);
308 309 310
	*addr = new_addr;
}

A
Ansis Atteka 已提交
311 312 313 314 315
static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
				 __be32 addr[4], const __be32 new_addr[4])
{
	int transport_len = skb->len - skb_transport_offset(skb);

316
	if (l4_proto == NEXTHDR_TCP) {
A
Ansis Atteka 已提交
317 318 319
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
						  addr, new_addr, 1);
320
	} else if (l4_proto == NEXTHDR_UDP) {
A
Ansis Atteka 已提交
321 322 323 324 325 326 327 328 329 330
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace16(&uh->check, skb,
							  addr, new_addr, 1);
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
331 332 333 334
	} else if (l4_proto == NEXTHDR_ICMP) {
		if (likely(transport_len >= sizeof(struct icmp6hdr)))
			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
						  skb, addr, new_addr, 1);
A
Ansis Atteka 已提交
335 336 337
	}
}

338 339 340 341 342 343 344 345 346
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
			   const __be32 mask[4], __be32 masked[4])
{
	masked[0] = MASKED(old[0], addr[0], mask[0]);
	masked[1] = MASKED(old[1], addr[1], mask[1]);
	masked[2] = MASKED(old[2], addr[2], mask[2]);
	masked[3] = MASKED(old[3], addr[3], mask[3]);
}

A
Ansis Atteka 已提交
347 348 349 350 351 352 353
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
			  __be32 addr[4], const __be32 new_addr[4],
			  bool recalculate_csum)
{
	if (recalculate_csum)
		update_ipv6_checksum(skb, l4_proto, addr, new_addr);

354
	skb_clear_hash(skb);
A
Ansis Atteka 已提交
355 356 357
	memcpy(addr, new_addr, sizeof(__be32[4]));
}

358
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
A
Ansis Atteka 已提交
359
{
360 361 362 363
	/* Bits 21-24 are always unmasked, so this retains their values. */
	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
A
Ansis Atteka 已提交
364 365
}

366 367
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
		       u8 mask)
A
Ansis Atteka 已提交
368
{
369
	new_ttl = MASKED(nh->ttl, new_ttl, mask);
A
Ansis Atteka 已提交
370

371 372 373 374
	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
	nh->ttl = new_ttl;
}

375 376 377
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv4 *key,
		    const struct ovs_key_ipv4 *mask)
378 379
{
	struct iphdr *nh;
380
	__be32 new_addr;
381 382
	int err;

383 384
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct iphdr));
385 386 387 388 389
	if (unlikely(err))
		return err;

	nh = ip_hdr(skb);

390 391 392 393 394 395
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (mask->ipv4_src) {
		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
396

397 398 399 400
		if (unlikely(new_addr != nh->saddr)) {
			set_ip_addr(skb, nh, &nh->saddr, new_addr);
			flow_key->ipv4.addr.src = new_addr;
		}
401
	}
402 403
	if (mask->ipv4_dst) {
		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
404

405 406 407 408
		if (unlikely(new_addr != nh->daddr)) {
			set_ip_addr(skb, nh, &nh->daddr, new_addr);
			flow_key->ipv4.addr.dst = new_addr;
		}
409
	}
410 411 412 413 414 415 416
	if (mask->ipv4_tos) {
		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
		flow_key->ip.tos = nh->tos;
	}
	if (mask->ipv4_ttl) {
		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
		flow_key->ip.ttl = nh->ttl;
417
	}
418 419 420 421

	return 0;
}

422 423 424 425 426 427 428 429
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
{
	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
}

static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv6 *key,
		    const struct ovs_key_ipv6 *mask)
A
Ansis Atteka 已提交
430 431 432 433
{
	struct ipv6hdr *nh;
	int err;

434 435
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct ipv6hdr));
A
Ansis Atteka 已提交
436 437 438 439 440
	if (unlikely(err))
		return err;

	nh = ipv6_hdr(skb);

441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
		__be32 *saddr = (__be32 *)&nh->saddr;
		__be32 masked[4];

		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);

		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
				      true);
			memcpy(&flow_key->ipv6.addr.src, masked,
			       sizeof(flow_key->ipv6.addr.src));
		}
	}
	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
A
Ansis Atteka 已提交
459 460 461
		unsigned int offset = 0;
		int flags = IP6_FH_F_SKIP_RH;
		bool recalc_csum = true;
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
		__be32 *daddr = (__be32 *)&nh->daddr;
		__be32 masked[4];

		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);

		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
			if (ipv6_ext_hdr(nh->nexthdr))
				recalc_csum = (ipv6_find_hdr(skb, &offset,
							     NEXTHDR_ROUTING,
							     NULL, &flags)
					       != NEXTHDR_ROUTING);

			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
				      recalc_csum);
			memcpy(&flow_key->ipv6.addr.dst, masked,
			       sizeof(flow_key->ipv6.addr.dst));
		}
	}
	if (mask->ipv6_tclass) {
		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
		flow_key->ip.tos = ipv6_get_dsfield(nh);
	}
	if (mask->ipv6_label) {
		set_ipv6_fl(nh, ntohl(key->ipv6_label),
			    ntohl(mask->ipv6_label));
		flow_key->ipv6.label =
		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
	}
	if (mask->ipv6_hlimit) {
		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
		flow_key->ip.ttl = nh->hop_limit;
A
Ansis Atteka 已提交
493 494 495 496
	}
	return 0;
}

497
/* Must follow skb_ensure_writable() since that can move the skb data. */
498
static void set_tp_port(struct sk_buff *skb, __be16 *port,
499
			__be16 new_port, __sum16 *check)
500 501 502
{
	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
	*port = new_port;
503 504
}

505 506 507
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_udp *key,
		   const struct ovs_key_udp *mask)
508 509
{
	struct udphdr *uh;
510
	__be16 src, dst;
511 512
	int err;

513 514
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct udphdr));
515 516 517 518
	if (unlikely(err))
		return err;

	uh = udp_hdr(skb);
519 520 521
	/* Either of the masks is non-zero, so do not bother checking them. */
	src = MASKED(uh->source, key->udp_src, mask->udp_src);
	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
522

523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
		if (likely(src != uh->source)) {
			set_tp_port(skb, &uh->source, src, &uh->check);
			flow_key->tp.src = src;
		}
		if (likely(dst != uh->dest)) {
			set_tp_port(skb, &uh->dest, dst, &uh->check);
			flow_key->tp.dst = dst;
		}

		if (unlikely(!uh->check))
			uh->check = CSUM_MANGLED_0;
	} else {
		uh->source = src;
		uh->dest = dst;
		flow_key->tp.src = src;
		flow_key->tp.dst = dst;
540
	}
541

542 543
	skb_clear_hash(skb);

544 545 546
	return 0;
}

547 548 549
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_tcp *key,
		   const struct ovs_key_tcp *mask)
550 551
{
	struct tcphdr *th;
552
	__be16 src, dst;
553 554
	int err;

555 556
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct tcphdr));
557 558 559 560
	if (unlikely(err))
		return err;

	th = tcp_hdr(skb);
561 562 563 564
	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
	if (likely(src != th->source)) {
		set_tp_port(skb, &th->source, src, &th->check);
		flow_key->tp.src = src;
565
	}
566 567 568 569
	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
	if (likely(dst != th->dest)) {
		set_tp_port(skb, &th->dest, dst, &th->check);
		flow_key->tp.dst = dst;
570
	}
571
	skb_clear_hash(skb);
572 573 574 575

	return 0;
}

576 577 578
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_sctp *key,
		    const struct ovs_key_sctp *mask)
J
Joe Stringer 已提交
579
{
580
	unsigned int sctphoff = skb_transport_offset(skb);
J
Joe Stringer 已提交
581
	struct sctphdr *sh;
582
	__le32 old_correct_csum, new_csum, old_csum;
J
Joe Stringer 已提交
583 584
	int err;

585
	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
J
Joe Stringer 已提交
586 587 588 589
	if (unlikely(err))
		return err;

	sh = sctp_hdr(skb);
590 591
	old_csum = sh->checksum;
	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
592

593 594
	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
J
Joe Stringer 已提交
595

596
	new_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
597

598 599
	/* Carry any checksum errors through. */
	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
J
Joe Stringer 已提交
600

601 602 603
	skb_clear_hash(skb);
	flow_key->tp.src = sh->source;
	flow_key->tp.dst = sh->dest;
J
Joe Stringer 已提交
604 605 606 607

	return 0;
}

608
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
609
{
610
	struct vport *vport = ovs_vport_rcu(dp, out_port);
611

612 613 614
	if (likely(vport))
		ovs_vport_send(vport, skb);
	else
615 616 617 618
		kfree_skb(skb);
}

static int output_userspace(struct datapath *dp, struct sk_buff *skb,
619 620
			    struct sw_flow_key *key, const struct nlattr *attr,
			    const struct nlattr *actions, int actions_len)
621
{
622
	struct ip_tunnel_info info;
623 624 625 626
	struct dp_upcall_info upcall;
	const struct nlattr *a;
	int rem;

627
	memset(&upcall, 0, sizeof(upcall));
628 629 630 631 632 633 634 635 636 637
	upcall.cmd = OVS_PACKET_CMD_ACTION;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_USERSPACE_ATTR_USERDATA:
			upcall.userdata = a;
			break;

		case OVS_USERSPACE_ATTR_PID:
638
			upcall.portid = nla_get_u32(a);
639
			break;
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654

		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
			/* Get out tunnel info. */
			struct vport *vport;

			vport = ovs_vport_rcu(dp, nla_get_u32(a));
			if (vport) {
				int err;

				err = ovs_vport_get_egress_tun_info(vport, skb,
								    &info);
				if (!err)
					upcall.egress_tun_info = &info;
			}
			break;
655
		}
656

657 658 659 660 661 662 663
		case OVS_USERSPACE_ATTR_ACTIONS: {
			/* Include actions. */
			upcall.actions = actions;
			upcall.actions_len = actions_len;
			break;
		}

664
		} /* End of switch. */
665 666
	}

667
	return ovs_dp_upcall(dp, skb, key, &upcall);
668 669 670
}

static int sample(struct datapath *dp, struct sk_buff *skb,
671 672
		  struct sw_flow_key *key, const struct nlattr *attr,
		  const struct nlattr *actions, int actions_len)
673 674 675 676 677 678 679
{
	const struct nlattr *acts_list = NULL;
	const struct nlattr *a;
	int rem;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
680 681
		u32 probability;

682 683
		switch (nla_type(a)) {
		case OVS_SAMPLE_ATTR_PROBABILITY:
684 685
			probability = nla_get_u32(a);
			if (!probability || prandom_u32() > probability)
686 687 688 689 690 691 692 693 694
				return 0;
			break;

		case OVS_SAMPLE_ATTR_ACTIONS:
			acts_list = a;
			break;
		}
	}

695 696 697
	rem = nla_len(acts_list);
	a = nla_data(acts_list);

698 699 700
	/* Actions list is empty, do nothing */
	if (unlikely(!rem))
		return 0;
701

702 703 704 705
	/* The only known usage of sample action is having a single user-space
	 * action. Treat this usage as a special case.
	 * The output_userspace() should clone the skb to be sent to the
	 * user space. This skb will be consumed by its caller.
706
	 */
707
	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
708
		   nla_is_last(a, rem)))
709
		return output_userspace(dp, skb, key, a, actions, actions_len);
710 711 712 713 714 715

	skb = skb_clone(skb, GFP_ATOMIC);
	if (!skb)
		/* Skip the sample action when out of memory. */
		return 0;

716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
	if (!add_deferred_actions(skb, key, a)) {
		if (net_ratelimit())
			pr_warn("%s: deferred actions limit reached, dropping sample action\n",
				ovs_dp_name(dp));

		kfree_skb(skb);
	}
	return 0;
}

static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
			 const struct nlattr *attr)
{
	struct ovs_action_hash *hash_act = nla_data(attr);
	u32 hash = 0;

	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
	hash = skb_get_hash(skb);
	hash = jhash_1word(hash, hash_act->hash_basis);
	if (!hash)
		hash = 0x1;

	key->ovs_flow_hash = hash;
739 740
}

741 742 743 744 745 746
static int execute_set_action(struct sk_buff *skb,
			      struct sw_flow_key *flow_key,
			      const struct nlattr *a)
{
	/* Only tunnel set execution is supported without a mask. */
	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
747 748 749 750 751 752 753 754 755
		struct ovs_tunnel_info *tun = nla_data(a);

		skb_dst_drop(skb);
		dst_hold((struct dst_entry *)tun->tun_dst);
		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);

		/* FIXME: Remove when all vports have been converted */
		OVS_CB(skb)->egress_tun_info = &tun->tun_dst->u.tun_info;

756 757 758 759 760 761 762 763 764 765 766 767
		return 0;
	}

	return -EINVAL;
}

/* Mask is at the midpoint of the data. */
#define get_mask(a, type) ((const type)nla_data(a) + 1)

static int execute_masked_set_action(struct sk_buff *skb,
				     struct sw_flow_key *flow_key,
				     const struct nlattr *a)
768 769 770
{
	int err = 0;

771
	switch (nla_type(a)) {
772
	case OVS_KEY_ATTR_PRIORITY:
773 774
		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.priority = skb->priority;
775 776
		break;

777
	case OVS_KEY_ATTR_SKB_MARK:
778 779
		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.skb_mark = skb->mark;
780 781
		break;

782
	case OVS_KEY_ATTR_TUNNEL_INFO:
783 784
		/* Masked data not supported for tunnel. */
		err = -EINVAL;
785 786
		break;

787
	case OVS_KEY_ATTR_ETHERNET:
788 789
		err = set_eth_addr(skb, flow_key, nla_data(a),
				   get_mask(a, struct ovs_key_ethernet *));
790 791 792
		break;

	case OVS_KEY_ATTR_IPV4:
793 794
		err = set_ipv4(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv4 *));
795 796
		break;

A
Ansis Atteka 已提交
797
	case OVS_KEY_ATTR_IPV6:
798 799
		err = set_ipv6(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv6 *));
A
Ansis Atteka 已提交
800 801
		break;

802
	case OVS_KEY_ATTR_TCP:
803 804
		err = set_tcp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_tcp *));
805 806 807
		break;

	case OVS_KEY_ATTR_UDP:
808 809
		err = set_udp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_udp *));
810
		break;
J
Joe Stringer 已提交
811 812

	case OVS_KEY_ATTR_SCTP:
813 814
		err = set_sctp(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_sctp *));
J
Joe Stringer 已提交
815
		break;
816 817

	case OVS_KEY_ATTR_MPLS:
818 819
		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
								    __be32 *));
820
		break;
821 822 823 824 825
	}

	return err;
}

826 827 828 829 830 831
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
			  struct sw_flow_key *key,
			  const struct nlattr *a, int rem)
{
	struct deferred_action *da;

832 833 834 835 836 837 838 839
	if (!is_flow_key_valid(key)) {
		int err;

		err = ovs_flow_key_update(skb, key);
		if (err)
			return err;
	}
	BUG_ON(!is_flow_key_valid(key));
840

841
	if (!nla_is_last(a, rem)) {
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
		/* Recirc action is the not the last action
		 * of the action list, need to clone the skb.
		 */
		skb = skb_clone(skb, GFP_ATOMIC);

		/* Skip the recirc action when out of memory, but
		 * continue on with the rest of the action list.
		 */
		if (!skb)
			return 0;
	}

	da = add_deferred_actions(skb, key, NULL);
	if (da) {
		da->pkt_key.recirc_id = nla_get_u32(a);
	} else {
		kfree_skb(skb);

		if (net_ratelimit())
			pr_warn("%s: deferred action limit reached, drop recirc action\n",
				ovs_dp_name(dp));
	}

	return 0;
}

868 869
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
870
			      struct sw_flow_key *key,
871
			      const struct nlattr *attr, int len)
872 873 874 875
{
	/* Every output action needs a separate clone of 'skb', but the common
	 * case is just a single output action, so that doing a clone and
	 * then freeing the original skbuff is wasteful.  So the following code
876 877
	 * is slightly obscure just to avoid that.
	 */
878 879 880 881 882 883 884 885
	int prev_port = -1;
	const struct nlattr *a;
	int rem;

	for (a = attr, rem = len; rem > 0;
	     a = nla_next(a, &rem)) {
		int err = 0;

886 887 888 889 890 891
		if (unlikely(prev_port != -1)) {
			struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);

			if (out_skb)
				do_output(dp, out_skb, prev_port);

892 893 894 895 896 897 898 899 900
			prev_port = -1;
		}

		switch (nla_type(a)) {
		case OVS_ACTION_ATTR_OUTPUT:
			prev_port = nla_get_u32(a);
			break;

		case OVS_ACTION_ATTR_USERSPACE:
901
			output_userspace(dp, skb, key, a, attr, len);
902 903
			break;

904 905 906 907
		case OVS_ACTION_ATTR_HASH:
			execute_hash(skb, key, a);
			break;

908
		case OVS_ACTION_ATTR_PUSH_MPLS:
909
			err = push_mpls(skb, key, nla_data(a));
910 911 912
			break;

		case OVS_ACTION_ATTR_POP_MPLS:
913
			err = pop_mpls(skb, key, nla_get_be16(a));
914 915
			break;

916
		case OVS_ACTION_ATTR_PUSH_VLAN:
917
			err = push_vlan(skb, key, nla_data(a));
918 919 920
			break;

		case OVS_ACTION_ATTR_POP_VLAN:
921
			err = pop_vlan(skb, key);
922 923
			break;

924 925
		case OVS_ACTION_ATTR_RECIRC:
			err = execute_recirc(dp, skb, key, a, rem);
926
			if (nla_is_last(a, rem)) {
927 928 929 930 931 932 933 934
				/* If this is the last action, the skb has
				 * been consumed or freed.
				 * Return immediately.
				 */
				return err;
			}
			break;

935
		case OVS_ACTION_ATTR_SET:
936
			err = execute_set_action(skb, key, nla_data(a));
937 938
			break;

939 940 941 942 943
		case OVS_ACTION_ATTR_SET_MASKED:
		case OVS_ACTION_ATTR_SET_TO_MASKED:
			err = execute_masked_set_action(skb, key, nla_data(a));
			break;

944
		case OVS_ACTION_ATTR_SAMPLE:
945
			err = sample(dp, skb, key, a, attr, len);
946 947 948 949 950 951 952 953 954
			break;
		}

		if (unlikely(err)) {
			kfree_skb(skb);
			return err;
		}
	}

955
	if (prev_port != -1)
956
		do_output(dp, skb, prev_port);
957
	else
958 959 960 961 962
		consume_skb(skb);

	return 0;
}

963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988
static void process_deferred_actions(struct datapath *dp)
{
	struct action_fifo *fifo = this_cpu_ptr(action_fifos);

	/* Do not touch the FIFO in case there is no deferred actions. */
	if (action_fifo_is_empty(fifo))
		return;

	/* Finishing executing all deferred actions. */
	do {
		struct deferred_action *da = action_fifo_get(fifo);
		struct sk_buff *skb = da->skb;
		struct sw_flow_key *key = &da->pkt_key;
		const struct nlattr *actions = da->actions;

		if (actions)
			do_execute_actions(dp, skb, key, actions,
					   nla_len(actions));
		else
			ovs_dp_process_packet(skb, key);
	} while (!action_fifo_is_empty(fifo));

	/* Reset FIFO for the next packet.  */
	action_fifo_init(fifo);
}

989
/* Execute a list of actions against 'skb'. */
990
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
991 992
			const struct sw_flow_actions *acts,
			struct sw_flow_key *key)
993
{
994 995 996 997
	int level = this_cpu_read(exec_actions_level);
	int err;

	this_cpu_inc(exec_actions_level);
998
	OVS_CB(skb)->egress_tun_info = NULL;
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
	err = do_execute_actions(dp, skb, key,
				 acts->actions, acts->actions_len);

	if (!level)
		process_deferred_actions(dp);

	this_cpu_dec(exec_actions_level);
	return err;
}

int action_fifos_init(void)
{
	action_fifos = alloc_percpu(struct action_fifo);
	if (!action_fifos)
		return -ENOMEM;
1014

1015 1016 1017 1018 1019 1020
	return 0;
}

void action_fifos_exit(void)
{
	free_percpu(action_fifos);
1021
}