actions.c 24.8 KB
Newer Older
1
/*
2
 * Copyright (c) 2007-2014 Nicira, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
J
Joe Stringer 已提交
25
#include <linux/sctp.h>
26 27 28 29 30
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
31

32
#include <net/ip.h>
A
Ansis Atteka 已提交
33
#include <net/ipv6.h>
34 35
#include <net/checksum.h>
#include <net/dsfield.h>
36
#include <net/mpls.h>
J
Joe Stringer 已提交
37
#include <net/sctp/checksum.h>
38 39

#include "datapath.h"
40
#include "flow.h"
41 42 43
#include "vport.h"

static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
44
			      struct sw_flow_key *key,
45
			      const struct nlattr *attr, int len);
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

static struct action_fifo __percpu *action_fifos;
static DEFINE_PER_CPU(int, exec_actions_level);

static void action_fifo_init(struct action_fifo *fifo)
{
	fifo->head = 0;
	fifo->tail = 0;
}

72
static bool action_fifo_is_empty(const struct action_fifo *fifo)
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
{
	return (fifo->head == fifo->tail);
}

static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
{
	if (action_fifo_is_empty(fifo))
		return NULL;

	return &fifo->fifo[fifo->tail++];
}

static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{
	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
		return NULL;

	return &fifo->fifo[fifo->head++];
}

/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
95
						    const struct sw_flow_key *key,
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
						    const struct nlattr *attr)
{
	struct action_fifo *fifo;
	struct deferred_action *da;

	fifo = this_cpu_ptr(action_fifos);
	da = action_fifo_put(fifo);
	if (da) {
		da->skb = skb;
		da->actions = attr;
		da->pkt_key = *key;
	}

	return da;
}

112 113 114 115 116 117 118 119 120 121 122
static void invalidate_flow_key(struct sw_flow_key *key)
{
	key->eth.type = htons(0);
}

static bool is_flow_key_valid(const struct sw_flow_key *key)
{
	return !!key->eth.type;
}

static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
		     const struct ovs_action_push_mpls *mpls)
{
	__be32 *new_mpls_lse;
	struct ethhdr *hdr;

	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
	if (skb->encapsulation)
		return -ENOTSUPP;

	if (skb_cow_head(skb, MPLS_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, MPLS_HLEN);
	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);
	skb_reset_mac_header(skb);

	new_mpls_lse = (__be32 *)skb_mpls_header(skb);
	*new_mpls_lse = mpls->mpls_lse;

	if (skb->ip_summed == CHECKSUM_COMPLETE)
		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
							     MPLS_HLEN, 0));

	hdr = eth_hdr(skb);
	hdr->h_proto = mpls->mpls_ethertype;

150 151
	if (!skb->inner_protocol)
		skb_set_inner_protocol(skb, skb->protocol);
152 153
	skb->protocol = mpls->mpls_ethertype;

154
	invalidate_flow_key(key);
155 156 157
	return 0;
}

158 159
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
		    const __be16 ethertype)
160 161 162 163
{
	struct ethhdr *hdr;
	int err;

164
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
165 166 167
	if (unlikely(err))
		return err;

168
	skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
169 170 171 172 173 174 175 176 177 178 179 180 181 182

	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);

	__skb_pull(skb, MPLS_HLEN);
	skb_reset_mac_header(skb);

	/* skb_mpls_header() is used to locate the ethertype
	 * field correctly in the presence of VLAN tags.
	 */
	hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
	hdr->h_proto = ethertype;
	if (eth_p_mpls(skb->protocol))
		skb->protocol = ethertype;
183 184

	invalidate_flow_key(key);
185 186 187
	return 0;
}

188 189 190 191 192 193
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))

static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const __be32 *mpls_lse, const __be32 *mask)
194 195
{
	__be32 *stack;
196
	__be32 lse;
197 198
	int err;

199
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
200 201 202 203
	if (unlikely(err))
		return err;

	stack = (__be32 *)skb_mpls_header(skb);
204
	lse = MASKED(*stack, *mpls_lse, *mask);
205
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
206 207
		__be32 diff[] = { ~(*stack), lse };

208 209 210 211
		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
					  ~skb->csum);
	}

212 213
	*stack = lse;
	flow_key->mpls.top_lse = lse;
214 215 216
	return 0;
}

217
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
218 219 220
{
	int err;

221
	err = skb_vlan_pop(skb);
222
	if (skb_vlan_tag_present(skb))
223 224
		invalidate_flow_key(key);
	else
225
		key->eth.tci = 0;
226
	return err;
227 228
}

229 230
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
		     const struct ovs_action_push_vlan *vlan)
231
{
232
	if (skb_vlan_tag_present(skb))
233
		invalidate_flow_key(key);
234
	else
235
		key->eth.tci = vlan->vlan_tci;
236 237
	return skb_vlan_push(skb, vlan->vlan_tpid,
			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/* 'src' is already properly masked. */
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
{
	u16 *dst = (u16 *)dst_;
	const u16 *src = (const u16 *)src_;
	const u16 *mask = (const u16 *)mask_;

	SET_MASKED(dst[0], src[0], mask[0]);
	SET_MASKED(dst[1], src[1], mask[1]);
	SET_MASKED(dst[2], src[2], mask[2]);
}

static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
			const struct ovs_key_ethernet *key,
			const struct ovs_key_ethernet *mask)
255 256
{
	int err;
257

258
	err = skb_ensure_writable(skb, ETH_HLEN);
259 260 261
	if (unlikely(err))
		return err;

262 263
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

264 265 266 267
	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
			       mask->eth_src);
	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
			       mask->eth_dst);
268

269 270
	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

271 272
	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
273 274 275 276
	return 0;
}

static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
277
			__be32 *addr, __be32 new_addr)
278 279 280 281 282 283 284 285
{
	int transport_len = skb->len - skb_transport_offset(skb);

	if (nh->protocol == IPPROTO_TCP) {
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
						 *addr, new_addr, 1);
	} else if (nh->protocol == IPPROTO_UDP) {
286 287 288 289 290 291 292 293 294 295
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace4(&uh->check, skb,
							 *addr, new_addr, 1);
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
296 297 298
	}

	csum_replace4(&nh->check, *addr, new_addr);
299
	skb_clear_hash(skb);
300 301 302
	*addr = new_addr;
}

A
Ansis Atteka 已提交
303 304 305 306 307
static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
				 __be32 addr[4], const __be32 new_addr[4])
{
	int transport_len = skb->len - skb_transport_offset(skb);

308
	if (l4_proto == NEXTHDR_TCP) {
A
Ansis Atteka 已提交
309 310 311
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
						  addr, new_addr, 1);
312
	} else if (l4_proto == NEXTHDR_UDP) {
A
Ansis Atteka 已提交
313 314 315 316 317 318 319 320 321 322
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace16(&uh->check, skb,
							  addr, new_addr, 1);
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
323 324 325 326
	} else if (l4_proto == NEXTHDR_ICMP) {
		if (likely(transport_len >= sizeof(struct icmp6hdr)))
			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
						  skb, addr, new_addr, 1);
A
Ansis Atteka 已提交
327 328 329
	}
}

330 331 332 333 334 335 336 337 338
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
			   const __be32 mask[4], __be32 masked[4])
{
	masked[0] = MASKED(old[0], addr[0], mask[0]);
	masked[1] = MASKED(old[1], addr[1], mask[1]);
	masked[2] = MASKED(old[2], addr[2], mask[2]);
	masked[3] = MASKED(old[3], addr[3], mask[3]);
}

A
Ansis Atteka 已提交
339 340 341 342 343 344 345
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
			  __be32 addr[4], const __be32 new_addr[4],
			  bool recalculate_csum)
{
	if (recalculate_csum)
		update_ipv6_checksum(skb, l4_proto, addr, new_addr);

346
	skb_clear_hash(skb);
A
Ansis Atteka 已提交
347 348 349
	memcpy(addr, new_addr, sizeof(__be32[4]));
}

350
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
A
Ansis Atteka 已提交
351
{
352 353 354 355
	/* Bits 21-24 are always unmasked, so this retains their values. */
	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
A
Ansis Atteka 已提交
356 357
}

358 359
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
		       u8 mask)
A
Ansis Atteka 已提交
360
{
361
	new_ttl = MASKED(nh->ttl, new_ttl, mask);
A
Ansis Atteka 已提交
362

363 364 365 366
	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
	nh->ttl = new_ttl;
}

367 368 369
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv4 *key,
		    const struct ovs_key_ipv4 *mask)
370 371
{
	struct iphdr *nh;
372
	__be32 new_addr;
373 374
	int err;

375 376
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct iphdr));
377 378 379 380 381
	if (unlikely(err))
		return err;

	nh = ip_hdr(skb);

382 383 384 385 386 387
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (mask->ipv4_src) {
		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
388

389 390 391 392
		if (unlikely(new_addr != nh->saddr)) {
			set_ip_addr(skb, nh, &nh->saddr, new_addr);
			flow_key->ipv4.addr.src = new_addr;
		}
393
	}
394 395
	if (mask->ipv4_dst) {
		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
396

397 398 399 400
		if (unlikely(new_addr != nh->daddr)) {
			set_ip_addr(skb, nh, &nh->daddr, new_addr);
			flow_key->ipv4.addr.dst = new_addr;
		}
401
	}
402 403 404 405 406 407 408
	if (mask->ipv4_tos) {
		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
		flow_key->ip.tos = nh->tos;
	}
	if (mask->ipv4_ttl) {
		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
		flow_key->ip.ttl = nh->ttl;
409
	}
410 411 412 413

	return 0;
}

414 415 416 417 418 419 420 421
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
{
	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
}

static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv6 *key,
		    const struct ovs_key_ipv6 *mask)
A
Ansis Atteka 已提交
422 423 424 425
{
	struct ipv6hdr *nh;
	int err;

426 427
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct ipv6hdr));
A
Ansis Atteka 已提交
428 429 430 431 432
	if (unlikely(err))
		return err;

	nh = ipv6_hdr(skb);

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
		__be32 *saddr = (__be32 *)&nh->saddr;
		__be32 masked[4];

		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);

		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
				      true);
			memcpy(&flow_key->ipv6.addr.src, masked,
			       sizeof(flow_key->ipv6.addr.src));
		}
	}
	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
A
Ansis Atteka 已提交
451 452 453
		unsigned int offset = 0;
		int flags = IP6_FH_F_SKIP_RH;
		bool recalc_csum = true;
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
		__be32 *daddr = (__be32 *)&nh->daddr;
		__be32 masked[4];

		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);

		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
			if (ipv6_ext_hdr(nh->nexthdr))
				recalc_csum = (ipv6_find_hdr(skb, &offset,
							     NEXTHDR_ROUTING,
							     NULL, &flags)
					       != NEXTHDR_ROUTING);

			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
				      recalc_csum);
			memcpy(&flow_key->ipv6.addr.dst, masked,
			       sizeof(flow_key->ipv6.addr.dst));
		}
	}
	if (mask->ipv6_tclass) {
		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
		flow_key->ip.tos = ipv6_get_dsfield(nh);
	}
	if (mask->ipv6_label) {
		set_ipv6_fl(nh, ntohl(key->ipv6_label),
			    ntohl(mask->ipv6_label));
		flow_key->ipv6.label =
		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
	}
	if (mask->ipv6_hlimit) {
		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
		flow_key->ip.ttl = nh->hop_limit;
A
Ansis Atteka 已提交
485 486 487 488
	}
	return 0;
}

489
/* Must follow skb_ensure_writable() since that can move the skb data. */
490
static void set_tp_port(struct sk_buff *skb, __be16 *port,
491
			__be16 new_port, __sum16 *check)
492 493 494
{
	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
	*port = new_port;
495 496
}

497 498 499
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_udp *key,
		   const struct ovs_key_udp *mask)
500 501
{
	struct udphdr *uh;
502
	__be16 src, dst;
503 504
	int err;

505 506
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct udphdr));
507 508 509 510
	if (unlikely(err))
		return err;

	uh = udp_hdr(skb);
511 512 513
	/* Either of the masks is non-zero, so do not bother checking them. */
	src = MASKED(uh->source, key->udp_src, mask->udp_src);
	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
514

515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
		if (likely(src != uh->source)) {
			set_tp_port(skb, &uh->source, src, &uh->check);
			flow_key->tp.src = src;
		}
		if (likely(dst != uh->dest)) {
			set_tp_port(skb, &uh->dest, dst, &uh->check);
			flow_key->tp.dst = dst;
		}

		if (unlikely(!uh->check))
			uh->check = CSUM_MANGLED_0;
	} else {
		uh->source = src;
		uh->dest = dst;
		flow_key->tp.src = src;
		flow_key->tp.dst = dst;
532
	}
533

534 535
	skb_clear_hash(skb);

536 537 538
	return 0;
}

539 540 541
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_tcp *key,
		   const struct ovs_key_tcp *mask)
542 543
{
	struct tcphdr *th;
544
	__be16 src, dst;
545 546
	int err;

547 548
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct tcphdr));
549 550 551 552
	if (unlikely(err))
		return err;

	th = tcp_hdr(skb);
553 554 555 556
	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
	if (likely(src != th->source)) {
		set_tp_port(skb, &th->source, src, &th->check);
		flow_key->tp.src = src;
557
	}
558 559 560 561
	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
	if (likely(dst != th->dest)) {
		set_tp_port(skb, &th->dest, dst, &th->check);
		flow_key->tp.dst = dst;
562
	}
563
	skb_clear_hash(skb);
564 565 566 567

	return 0;
}

568 569 570
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_sctp *key,
		    const struct ovs_key_sctp *mask)
J
Joe Stringer 已提交
571
{
572
	unsigned int sctphoff = skb_transport_offset(skb);
J
Joe Stringer 已提交
573
	struct sctphdr *sh;
574
	__le32 old_correct_csum, new_csum, old_csum;
J
Joe Stringer 已提交
575 576
	int err;

577
	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
J
Joe Stringer 已提交
578 579 580 581
	if (unlikely(err))
		return err;

	sh = sctp_hdr(skb);
582 583
	old_csum = sh->checksum;
	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
584

585 586
	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
J
Joe Stringer 已提交
587

588
	new_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
589

590 591
	/* Carry any checksum errors through. */
	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
J
Joe Stringer 已提交
592

593 594 595
	skb_clear_hash(skb);
	flow_key->tp.src = sh->source;
	flow_key->tp.dst = sh->dest;
J
Joe Stringer 已提交
596 597 598 599

	return 0;
}

600
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
601
{
602
	struct vport *vport = ovs_vport_rcu(dp, out_port);
603

604 605 606
	if (likely(vport))
		ovs_vport_send(vport, skb);
	else
607 608 609 610
		kfree_skb(skb);
}

static int output_userspace(struct datapath *dp, struct sk_buff *skb,
611 612
			    struct sw_flow_key *key, const struct nlattr *attr,
			    const struct nlattr *actions, int actions_len)
613
{
614
	struct ip_tunnel_info info;
615 616 617 618
	struct dp_upcall_info upcall;
	const struct nlattr *a;
	int rem;

619
	memset(&upcall, 0, sizeof(upcall));
620 621 622 623 624 625 626 627 628 629
	upcall.cmd = OVS_PACKET_CMD_ACTION;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_USERSPACE_ATTR_USERDATA:
			upcall.userdata = a;
			break;

		case OVS_USERSPACE_ATTR_PID:
630
			upcall.portid = nla_get_u32(a);
631
			break;
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646

		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
			/* Get out tunnel info. */
			struct vport *vport;

			vport = ovs_vport_rcu(dp, nla_get_u32(a));
			if (vport) {
				int err;

				err = ovs_vport_get_egress_tun_info(vport, skb,
								    &info);
				if (!err)
					upcall.egress_tun_info = &info;
			}
			break;
647
		}
648

649 650 651 652 653 654 655
		case OVS_USERSPACE_ATTR_ACTIONS: {
			/* Include actions. */
			upcall.actions = actions;
			upcall.actions_len = actions_len;
			break;
		}

656
		} /* End of switch. */
657 658
	}

659
	return ovs_dp_upcall(dp, skb, key, &upcall);
660 661 662
}

static int sample(struct datapath *dp, struct sk_buff *skb,
663 664
		  struct sw_flow_key *key, const struct nlattr *attr,
		  const struct nlattr *actions, int actions_len)
665 666 667 668 669 670 671 672 673
{
	const struct nlattr *acts_list = NULL;
	const struct nlattr *a;
	int rem;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_SAMPLE_ATTR_PROBABILITY:
674
			if (prandom_u32() >= nla_get_u32(a))
675 676 677 678 679 680 681 682 683
				return 0;
			break;

		case OVS_SAMPLE_ATTR_ACTIONS:
			acts_list = a;
			break;
		}
	}

684 685 686
	rem = nla_len(acts_list);
	a = nla_data(acts_list);

687 688 689
	/* Actions list is empty, do nothing */
	if (unlikely(!rem))
		return 0;
690

691 692 693 694
	/* The only known usage of sample action is having a single user-space
	 * action. Treat this usage as a special case.
	 * The output_userspace() should clone the skb to be sent to the
	 * user space. This skb will be consumed by its caller.
695
	 */
696
	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
697
		   nla_is_last(a, rem)))
698
		return output_userspace(dp, skb, key, a, actions, actions_len);
699 700 701 702 703 704

	skb = skb_clone(skb, GFP_ATOMIC);
	if (!skb)
		/* Skip the sample action when out of memory. */
		return 0;

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
	if (!add_deferred_actions(skb, key, a)) {
		if (net_ratelimit())
			pr_warn("%s: deferred actions limit reached, dropping sample action\n",
				ovs_dp_name(dp));

		kfree_skb(skb);
	}
	return 0;
}

static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
			 const struct nlattr *attr)
{
	struct ovs_action_hash *hash_act = nla_data(attr);
	u32 hash = 0;

	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
	hash = skb_get_hash(skb);
	hash = jhash_1word(hash, hash_act->hash_basis);
	if (!hash)
		hash = 0x1;

	key->ovs_flow_hash = hash;
728 729
}

730 731 732 733 734 735
static int execute_set_action(struct sk_buff *skb,
			      struct sw_flow_key *flow_key,
			      const struct nlattr *a)
{
	/* Only tunnel set execution is supported without a mask. */
	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
736 737 738 739 740 741 742 743 744
		struct ovs_tunnel_info *tun = nla_data(a);

		skb_dst_drop(skb);
		dst_hold((struct dst_entry *)tun->tun_dst);
		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);

		/* FIXME: Remove when all vports have been converted */
		OVS_CB(skb)->egress_tun_info = &tun->tun_dst->u.tun_info;

745 746 747 748 749 750 751 752 753 754 755 756
		return 0;
	}

	return -EINVAL;
}

/* Mask is at the midpoint of the data. */
#define get_mask(a, type) ((const type)nla_data(a) + 1)

static int execute_masked_set_action(struct sk_buff *skb,
				     struct sw_flow_key *flow_key,
				     const struct nlattr *a)
757 758 759
{
	int err = 0;

760
	switch (nla_type(a)) {
761
	case OVS_KEY_ATTR_PRIORITY:
762 763
		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.priority = skb->priority;
764 765
		break;

766
	case OVS_KEY_ATTR_SKB_MARK:
767 768
		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.skb_mark = skb->mark;
769 770
		break;

771
	case OVS_KEY_ATTR_TUNNEL_INFO:
772 773
		/* Masked data not supported for tunnel. */
		err = -EINVAL;
774 775
		break;

776
	case OVS_KEY_ATTR_ETHERNET:
777 778
		err = set_eth_addr(skb, flow_key, nla_data(a),
				   get_mask(a, struct ovs_key_ethernet *));
779 780 781
		break;

	case OVS_KEY_ATTR_IPV4:
782 783
		err = set_ipv4(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv4 *));
784 785
		break;

A
Ansis Atteka 已提交
786
	case OVS_KEY_ATTR_IPV6:
787 788
		err = set_ipv6(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv6 *));
A
Ansis Atteka 已提交
789 790
		break;

791
	case OVS_KEY_ATTR_TCP:
792 793
		err = set_tcp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_tcp *));
794 795 796
		break;

	case OVS_KEY_ATTR_UDP:
797 798
		err = set_udp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_udp *));
799
		break;
J
Joe Stringer 已提交
800 801

	case OVS_KEY_ATTR_SCTP:
802 803
		err = set_sctp(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_sctp *));
J
Joe Stringer 已提交
804
		break;
805 806

	case OVS_KEY_ATTR_MPLS:
807 808
		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
								    __be32 *));
809
		break;
810 811 812 813 814
	}

	return err;
}

815 816 817 818 819 820
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
			  struct sw_flow_key *key,
			  const struct nlattr *a, int rem)
{
	struct deferred_action *da;

821 822 823 824 825 826 827 828
	if (!is_flow_key_valid(key)) {
		int err;

		err = ovs_flow_key_update(skb, key);
		if (err)
			return err;
	}
	BUG_ON(!is_flow_key_valid(key));
829

830
	if (!nla_is_last(a, rem)) {
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
		/* Recirc action is the not the last action
		 * of the action list, need to clone the skb.
		 */
		skb = skb_clone(skb, GFP_ATOMIC);

		/* Skip the recirc action when out of memory, but
		 * continue on with the rest of the action list.
		 */
		if (!skb)
			return 0;
	}

	da = add_deferred_actions(skb, key, NULL);
	if (da) {
		da->pkt_key.recirc_id = nla_get_u32(a);
	} else {
		kfree_skb(skb);

		if (net_ratelimit())
			pr_warn("%s: deferred action limit reached, drop recirc action\n",
				ovs_dp_name(dp));
	}

	return 0;
}

857 858
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
859
			      struct sw_flow_key *key,
860
			      const struct nlattr *attr, int len)
861 862 863 864
{
	/* Every output action needs a separate clone of 'skb', but the common
	 * case is just a single output action, so that doing a clone and
	 * then freeing the original skbuff is wasteful.  So the following code
865 866
	 * is slightly obscure just to avoid that.
	 */
867 868 869 870 871 872 873 874
	int prev_port = -1;
	const struct nlattr *a;
	int rem;

	for (a = attr, rem = len; rem > 0;
	     a = nla_next(a, &rem)) {
		int err = 0;

875 876 877 878 879 880
		if (unlikely(prev_port != -1)) {
			struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);

			if (out_skb)
				do_output(dp, out_skb, prev_port);

881 882 883 884 885 886 887 888 889
			prev_port = -1;
		}

		switch (nla_type(a)) {
		case OVS_ACTION_ATTR_OUTPUT:
			prev_port = nla_get_u32(a);
			break;

		case OVS_ACTION_ATTR_USERSPACE:
890
			output_userspace(dp, skb, key, a, attr, len);
891 892
			break;

893 894 895 896
		case OVS_ACTION_ATTR_HASH:
			execute_hash(skb, key, a);
			break;

897
		case OVS_ACTION_ATTR_PUSH_MPLS:
898
			err = push_mpls(skb, key, nla_data(a));
899 900 901
			break;

		case OVS_ACTION_ATTR_POP_MPLS:
902
			err = pop_mpls(skb, key, nla_get_be16(a));
903 904
			break;

905
		case OVS_ACTION_ATTR_PUSH_VLAN:
906
			err = push_vlan(skb, key, nla_data(a));
907 908 909
			break;

		case OVS_ACTION_ATTR_POP_VLAN:
910
			err = pop_vlan(skb, key);
911 912
			break;

913 914
		case OVS_ACTION_ATTR_RECIRC:
			err = execute_recirc(dp, skb, key, a, rem);
915
			if (nla_is_last(a, rem)) {
916 917 918 919 920 921 922 923
				/* If this is the last action, the skb has
				 * been consumed or freed.
				 * Return immediately.
				 */
				return err;
			}
			break;

924
		case OVS_ACTION_ATTR_SET:
925
			err = execute_set_action(skb, key, nla_data(a));
926 927
			break;

928 929 930 931 932
		case OVS_ACTION_ATTR_SET_MASKED:
		case OVS_ACTION_ATTR_SET_TO_MASKED:
			err = execute_masked_set_action(skb, key, nla_data(a));
			break;

933
		case OVS_ACTION_ATTR_SAMPLE:
934
			err = sample(dp, skb, key, a, attr, len);
935 936 937 938 939 940 941 942 943
			break;
		}

		if (unlikely(err)) {
			kfree_skb(skb);
			return err;
		}
	}

944
	if (prev_port != -1)
945
		do_output(dp, skb, prev_port);
946
	else
947 948 949 950 951
		consume_skb(skb);

	return 0;
}

952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
static void process_deferred_actions(struct datapath *dp)
{
	struct action_fifo *fifo = this_cpu_ptr(action_fifos);

	/* Do not touch the FIFO in case there is no deferred actions. */
	if (action_fifo_is_empty(fifo))
		return;

	/* Finishing executing all deferred actions. */
	do {
		struct deferred_action *da = action_fifo_get(fifo);
		struct sk_buff *skb = da->skb;
		struct sw_flow_key *key = &da->pkt_key;
		const struct nlattr *actions = da->actions;

		if (actions)
			do_execute_actions(dp, skb, key, actions,
					   nla_len(actions));
		else
			ovs_dp_process_packet(skb, key);
	} while (!action_fifo_is_empty(fifo));

	/* Reset FIFO for the next packet.  */
	action_fifo_init(fifo);
}

978
/* Execute a list of actions against 'skb'. */
979
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
980 981
			const struct sw_flow_actions *acts,
			struct sw_flow_key *key)
982
{
983 984 985 986
	int level = this_cpu_read(exec_actions_level);
	int err;

	this_cpu_inc(exec_actions_level);
987
	OVS_CB(skb)->egress_tun_info = NULL;
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
	err = do_execute_actions(dp, skb, key,
				 acts->actions, acts->actions_len);

	if (!level)
		process_deferred_actions(dp);

	this_cpu_dec(exec_actions_level);
	return err;
}

int action_fifos_init(void)
{
	action_fifos = alloc_percpu(struct action_fifo);
	if (!action_fifos)
		return -ENOMEM;
1003

1004 1005 1006 1007 1008 1009
	return 0;
}

void action_fifos_exit(void)
{
	free_percpu(action_fifos);
1010
}