actions.c 24.5 KB
Newer Older
1
/*
2
 * Copyright (c) 2007-2014 Nicira, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
J
Joe Stringer 已提交
25
#include <linux/sctp.h>
26 27 28 29 30
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
31

32
#include <net/ip.h>
A
Ansis Atteka 已提交
33
#include <net/ipv6.h>
34 35
#include <net/checksum.h>
#include <net/dsfield.h>
36
#include <net/mpls.h>
J
Joe Stringer 已提交
37
#include <net/sctp/checksum.h>
38 39

#include "datapath.h"
40
#include "flow.h"
41 42 43
#include "vport.h"

static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
44
			      struct sw_flow_key *key,
45
			      const struct nlattr *attr, int len);
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

static struct action_fifo __percpu *action_fifos;
static DEFINE_PER_CPU(int, exec_actions_level);

static void action_fifo_init(struct action_fifo *fifo)
{
	fifo->head = 0;
	fifo->tail = 0;
}

72
static bool action_fifo_is_empty(const struct action_fifo *fifo)
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
{
	return (fifo->head == fifo->tail);
}

static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
{
	if (action_fifo_is_empty(fifo))
		return NULL;

	return &fifo->fifo[fifo->tail++];
}

static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{
	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
		return NULL;

	return &fifo->fifo[fifo->head++];
}

/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
95
						    const struct sw_flow_key *key,
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
						    const struct nlattr *attr)
{
	struct action_fifo *fifo;
	struct deferred_action *da;

	fifo = this_cpu_ptr(action_fifos);
	da = action_fifo_put(fifo);
	if (da) {
		da->skb = skb;
		da->actions = attr;
		da->pkt_key = *key;
	}

	return da;
}

112 113 114 115 116 117 118 119 120 121 122
static void invalidate_flow_key(struct sw_flow_key *key)
{
	key->eth.type = htons(0);
}

static bool is_flow_key_valid(const struct sw_flow_key *key)
{
	return !!key->eth.type;
}

static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
		     const struct ovs_action_push_mpls *mpls)
{
	__be32 *new_mpls_lse;
	struct ethhdr *hdr;

	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
	if (skb->encapsulation)
		return -ENOTSUPP;

	if (skb_cow_head(skb, MPLS_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, MPLS_HLEN);
	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);
	skb_reset_mac_header(skb);

	new_mpls_lse = (__be32 *)skb_mpls_header(skb);
	*new_mpls_lse = mpls->mpls_lse;

	if (skb->ip_summed == CHECKSUM_COMPLETE)
		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
							     MPLS_HLEN, 0));

	hdr = eth_hdr(skb);
	hdr->h_proto = mpls->mpls_ethertype;

150 151
	if (!skb->inner_protocol)
		skb_set_inner_protocol(skb, skb->protocol);
152 153
	skb->protocol = mpls->mpls_ethertype;

154
	invalidate_flow_key(key);
155 156 157
	return 0;
}

158 159
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
		    const __be16 ethertype)
160 161 162 163
{
	struct ethhdr *hdr;
	int err;

164
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
165 166 167
	if (unlikely(err))
		return err;

168
	skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
169 170 171 172 173 174 175 176 177 178 179 180 181 182

	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);

	__skb_pull(skb, MPLS_HLEN);
	skb_reset_mac_header(skb);

	/* skb_mpls_header() is used to locate the ethertype
	 * field correctly in the presence of VLAN tags.
	 */
	hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
	hdr->h_proto = ethertype;
	if (eth_p_mpls(skb->protocol))
		skb->protocol = ethertype;
183 184

	invalidate_flow_key(key);
185 186 187
	return 0;
}

188 189 190 191 192 193
/* 'KEY' must not have any bits set outside of the 'MASK' */
#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))

static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const __be32 *mpls_lse, const __be32 *mask)
194 195
{
	__be32 *stack;
196
	__be32 lse;
197 198
	int err;

199
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
200 201 202 203
	if (unlikely(err))
		return err;

	stack = (__be32 *)skb_mpls_header(skb);
204
	lse = MASKED(*stack, *mpls_lse, *mask);
205
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
206 207
		__be32 diff[] = { ~(*stack), lse };

208 209 210 211
		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
					  ~skb->csum);
	}

212 213
	*stack = lse;
	flow_key->mpls.top_lse = lse;
214 215 216
	return 0;
}

217
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
218 219 220
{
	int err;

221
	err = skb_vlan_pop(skb);
222
	if (skb_vlan_tag_present(skb))
223 224
		invalidate_flow_key(key);
	else
225
		key->eth.tci = 0;
226
	return err;
227 228
}

229 230
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
		     const struct ovs_action_push_vlan *vlan)
231
{
232
	if (skb_vlan_tag_present(skb))
233
		invalidate_flow_key(key);
234
	else
235
		key->eth.tci = vlan->vlan_tci;
236 237
	return skb_vlan_push(skb, vlan->vlan_tpid,
			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/* 'src' is already properly masked. */
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
{
	u16 *dst = (u16 *)dst_;
	const u16 *src = (const u16 *)src_;
	const u16 *mask = (const u16 *)mask_;

	SET_MASKED(dst[0], src[0], mask[0]);
	SET_MASKED(dst[1], src[1], mask[1]);
	SET_MASKED(dst[2], src[2], mask[2]);
}

static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
			const struct ovs_key_ethernet *key,
			const struct ovs_key_ethernet *mask)
255 256
{
	int err;
257

258
	err = skb_ensure_writable(skb, ETH_HLEN);
259 260 261
	if (unlikely(err))
		return err;

262 263
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

264 265 266 267
	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
			       mask->eth_src);
	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
			       mask->eth_dst);
268

269 270
	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

271 272
	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
273 274 275 276
	return 0;
}

static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
277
			__be32 *addr, __be32 new_addr)
278 279 280 281 282 283 284 285
{
	int transport_len = skb->len - skb_transport_offset(skb);

	if (nh->protocol == IPPROTO_TCP) {
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
						 *addr, new_addr, 1);
	} else if (nh->protocol == IPPROTO_UDP) {
286 287 288 289 290 291 292 293 294 295
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace4(&uh->check, skb,
							 *addr, new_addr, 1);
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
296 297 298
	}

	csum_replace4(&nh->check, *addr, new_addr);
299
	skb_clear_hash(skb);
300 301 302
	*addr = new_addr;
}

A
Ansis Atteka 已提交
303 304 305 306 307
static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
				 __be32 addr[4], const __be32 new_addr[4])
{
	int transport_len = skb->len - skb_transport_offset(skb);

308
	if (l4_proto == NEXTHDR_TCP) {
A
Ansis Atteka 已提交
309 310 311
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
						  addr, new_addr, 1);
312
	} else if (l4_proto == NEXTHDR_UDP) {
A
Ansis Atteka 已提交
313 314 315 316 317 318 319 320 321 322
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace16(&uh->check, skb,
							  addr, new_addr, 1);
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
323 324 325 326
	} else if (l4_proto == NEXTHDR_ICMP) {
		if (likely(transport_len >= sizeof(struct icmp6hdr)))
			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
						  skb, addr, new_addr, 1);
A
Ansis Atteka 已提交
327 328 329
	}
}

330 331 332 333 334 335 336 337 338
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
			   const __be32 mask[4], __be32 masked[4])
{
	masked[0] = MASKED(old[0], addr[0], mask[0]);
	masked[1] = MASKED(old[1], addr[1], mask[1]);
	masked[2] = MASKED(old[2], addr[2], mask[2]);
	masked[3] = MASKED(old[3], addr[3], mask[3]);
}

A
Ansis Atteka 已提交
339 340 341 342 343 344 345
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
			  __be32 addr[4], const __be32 new_addr[4],
			  bool recalculate_csum)
{
	if (recalculate_csum)
		update_ipv6_checksum(skb, l4_proto, addr, new_addr);

346
	skb_clear_hash(skb);
A
Ansis Atteka 已提交
347 348 349
	memcpy(addr, new_addr, sizeof(__be32[4]));
}

350
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
A
Ansis Atteka 已提交
351
{
352 353 354 355
	/* Bits 21-24 are always unmasked, so this retains their values. */
	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
A
Ansis Atteka 已提交
356 357
}

358 359
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
		       u8 mask)
A
Ansis Atteka 已提交
360
{
361
	new_ttl = MASKED(nh->ttl, new_ttl, mask);
A
Ansis Atteka 已提交
362

363 364 365 366
	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
	nh->ttl = new_ttl;
}

367 368 369
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv4 *key,
		    const struct ovs_key_ipv4 *mask)
370 371
{
	struct iphdr *nh;
372
	__be32 new_addr;
373 374
	int err;

375 376
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct iphdr));
377 378 379 380 381
	if (unlikely(err))
		return err;

	nh = ip_hdr(skb);

382 383 384 385 386 387
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (mask->ipv4_src) {
		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
388

389 390 391 392
		if (unlikely(new_addr != nh->saddr)) {
			set_ip_addr(skb, nh, &nh->saddr, new_addr);
			flow_key->ipv4.addr.src = new_addr;
		}
393
	}
394 395
	if (mask->ipv4_dst) {
		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
396

397 398 399 400
		if (unlikely(new_addr != nh->daddr)) {
			set_ip_addr(skb, nh, &nh->daddr, new_addr);
			flow_key->ipv4.addr.dst = new_addr;
		}
401
	}
402 403 404 405 406 407 408
	if (mask->ipv4_tos) {
		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
		flow_key->ip.tos = nh->tos;
	}
	if (mask->ipv4_ttl) {
		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
		flow_key->ip.ttl = nh->ttl;
409
	}
410 411 412 413

	return 0;
}

414 415 416 417 418 419 420 421
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
{
	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
}

static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv6 *key,
		    const struct ovs_key_ipv6 *mask)
A
Ansis Atteka 已提交
422 423 424 425
{
	struct ipv6hdr *nh;
	int err;

426 427
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct ipv6hdr));
A
Ansis Atteka 已提交
428 429 430 431 432
	if (unlikely(err))
		return err;

	nh = ipv6_hdr(skb);

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
		__be32 *saddr = (__be32 *)&nh->saddr;
		__be32 masked[4];

		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);

		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
				      true);
			memcpy(&flow_key->ipv6.addr.src, masked,
			       sizeof(flow_key->ipv6.addr.src));
		}
	}
	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
A
Ansis Atteka 已提交
451 452 453
		unsigned int offset = 0;
		int flags = IP6_FH_F_SKIP_RH;
		bool recalc_csum = true;
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
		__be32 *daddr = (__be32 *)&nh->daddr;
		__be32 masked[4];

		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);

		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
			if (ipv6_ext_hdr(nh->nexthdr))
				recalc_csum = (ipv6_find_hdr(skb, &offset,
							     NEXTHDR_ROUTING,
							     NULL, &flags)
					       != NEXTHDR_ROUTING);

			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
				      recalc_csum);
			memcpy(&flow_key->ipv6.addr.dst, masked,
			       sizeof(flow_key->ipv6.addr.dst));
		}
	}
	if (mask->ipv6_tclass) {
		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
		flow_key->ip.tos = ipv6_get_dsfield(nh);
	}
	if (mask->ipv6_label) {
		set_ipv6_fl(nh, ntohl(key->ipv6_label),
			    ntohl(mask->ipv6_label));
		flow_key->ipv6.label =
		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
	}
	if (mask->ipv6_hlimit) {
		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
		flow_key->ip.ttl = nh->hop_limit;
A
Ansis Atteka 已提交
485 486 487 488
	}
	return 0;
}

489
/* Must follow skb_ensure_writable() since that can move the skb data. */
490
static void set_tp_port(struct sk_buff *skb, __be16 *port,
491
			__be16 new_port, __sum16 *check)
492 493 494
{
	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
	*port = new_port;
495 496
}

497 498 499
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_udp *key,
		   const struct ovs_key_udp *mask)
500 501
{
	struct udphdr *uh;
502
	__be16 src, dst;
503 504
	int err;

505 506
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct udphdr));
507 508 509 510
	if (unlikely(err))
		return err;

	uh = udp_hdr(skb);
511 512 513
	/* Either of the masks is non-zero, so do not bother checking them. */
	src = MASKED(uh->source, key->udp_src, mask->udp_src);
	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
514

515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
		if (likely(src != uh->source)) {
			set_tp_port(skb, &uh->source, src, &uh->check);
			flow_key->tp.src = src;
		}
		if (likely(dst != uh->dest)) {
			set_tp_port(skb, &uh->dest, dst, &uh->check);
			flow_key->tp.dst = dst;
		}

		if (unlikely(!uh->check))
			uh->check = CSUM_MANGLED_0;
	} else {
		uh->source = src;
		uh->dest = dst;
		flow_key->tp.src = src;
		flow_key->tp.dst = dst;
532
	}
533

534 535
	skb_clear_hash(skb);

536 537 538
	return 0;
}

539 540 541
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_tcp *key,
		   const struct ovs_key_tcp *mask)
542 543
{
	struct tcphdr *th;
544
	__be16 src, dst;
545 546
	int err;

547 548
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct tcphdr));
549 550 551 552
	if (unlikely(err))
		return err;

	th = tcp_hdr(skb);
553 554 555 556
	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
	if (likely(src != th->source)) {
		set_tp_port(skb, &th->source, src, &th->check);
		flow_key->tp.src = src;
557
	}
558 559 560 561
	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
	if (likely(dst != th->dest)) {
		set_tp_port(skb, &th->dest, dst, &th->check);
		flow_key->tp.dst = dst;
562
	}
563
	skb_clear_hash(skb);
564 565 566 567

	return 0;
}

568 569 570
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_sctp *key,
		    const struct ovs_key_sctp *mask)
J
Joe Stringer 已提交
571
{
572
	unsigned int sctphoff = skb_transport_offset(skb);
J
Joe Stringer 已提交
573
	struct sctphdr *sh;
574
	__le32 old_correct_csum, new_csum, old_csum;
J
Joe Stringer 已提交
575 576
	int err;

577
	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
J
Joe Stringer 已提交
578 579 580 581
	if (unlikely(err))
		return err;

	sh = sctp_hdr(skb);
582 583
	old_csum = sh->checksum;
	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
584

585 586
	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
J
Joe Stringer 已提交
587

588
	new_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
589

590 591
	/* Carry any checksum errors through. */
	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
J
Joe Stringer 已提交
592

593 594 595
	skb_clear_hash(skb);
	flow_key->tp.src = sh->source;
	flow_key->tp.dst = sh->dest;
J
Joe Stringer 已提交
596 597 598 599

	return 0;
}

600
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
601
{
602
	struct vport *vport = ovs_vport_rcu(dp, out_port);
603

604 605 606
	if (likely(vport))
		ovs_vport_send(vport, skb);
	else
607 608 609 610
		kfree_skb(skb);
}

static int output_userspace(struct datapath *dp, struct sk_buff *skb,
611 612
			    struct sw_flow_key *key, const struct nlattr *attr,
			    const struct nlattr *actions, int actions_len)
613
{
614
	struct ip_tunnel_info info;
615 616 617 618
	struct dp_upcall_info upcall;
	const struct nlattr *a;
	int rem;

619
	memset(&upcall, 0, sizeof(upcall));
620 621 622 623 624 625 626 627 628 629
	upcall.cmd = OVS_PACKET_CMD_ACTION;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_USERSPACE_ATTR_USERDATA:
			upcall.userdata = a;
			break;

		case OVS_USERSPACE_ATTR_PID:
630
			upcall.portid = nla_get_u32(a);
631
			break;
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646

		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
			/* Get out tunnel info. */
			struct vport *vport;

			vport = ovs_vport_rcu(dp, nla_get_u32(a));
			if (vport) {
				int err;

				err = ovs_vport_get_egress_tun_info(vport, skb,
								    &info);
				if (!err)
					upcall.egress_tun_info = &info;
			}
			break;
647
		}
648

649 650 651 652 653 654 655
		case OVS_USERSPACE_ATTR_ACTIONS: {
			/* Include actions. */
			upcall.actions = actions;
			upcall.actions_len = actions_len;
			break;
		}

656
		} /* End of switch. */
657 658
	}

659
	return ovs_dp_upcall(dp, skb, key, &upcall);
660 661 662
}

static int sample(struct datapath *dp, struct sk_buff *skb,
663 664
		  struct sw_flow_key *key, const struct nlattr *attr,
		  const struct nlattr *actions, int actions_len)
665 666 667 668 669 670 671 672 673
{
	const struct nlattr *acts_list = NULL;
	const struct nlattr *a;
	int rem;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_SAMPLE_ATTR_PROBABILITY:
674
			if (prandom_u32() >= nla_get_u32(a))
675 676 677 678 679 680 681 682 683
				return 0;
			break;

		case OVS_SAMPLE_ATTR_ACTIONS:
			acts_list = a;
			break;
		}
	}

684 685 686
	rem = nla_len(acts_list);
	a = nla_data(acts_list);

687 688 689
	/* Actions list is empty, do nothing */
	if (unlikely(!rem))
		return 0;
690

691 692 693 694
	/* The only known usage of sample action is having a single user-space
	 * action. Treat this usage as a special case.
	 * The output_userspace() should clone the skb to be sent to the
	 * user space. This skb will be consumed by its caller.
695
	 */
696
	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
697
		   nla_is_last(a, rem)))
698
		return output_userspace(dp, skb, key, a, actions, actions_len);
699 700 701 702 703 704

	skb = skb_clone(skb, GFP_ATOMIC);
	if (!skb)
		/* Skip the sample action when out of memory. */
		return 0;

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
	if (!add_deferred_actions(skb, key, a)) {
		if (net_ratelimit())
			pr_warn("%s: deferred actions limit reached, dropping sample action\n",
				ovs_dp_name(dp));

		kfree_skb(skb);
	}
	return 0;
}

static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
			 const struct nlattr *attr)
{
	struct ovs_action_hash *hash_act = nla_data(attr);
	u32 hash = 0;

	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
	hash = skb_get_hash(skb);
	hash = jhash_1word(hash, hash_act->hash_basis);
	if (!hash)
		hash = 0x1;

	key->ovs_flow_hash = hash;
728 729
}

730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
static int execute_set_action(struct sk_buff *skb,
			      struct sw_flow_key *flow_key,
			      const struct nlattr *a)
{
	/* Only tunnel set execution is supported without a mask. */
	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
		OVS_CB(skb)->egress_tun_info = nla_data(a);
		return 0;
	}

	return -EINVAL;
}

/* Mask is at the midpoint of the data. */
#define get_mask(a, type) ((const type)nla_data(a) + 1)

static int execute_masked_set_action(struct sk_buff *skb,
				     struct sw_flow_key *flow_key,
				     const struct nlattr *a)
749 750 751
{
	int err = 0;

752
	switch (nla_type(a)) {
753
	case OVS_KEY_ATTR_PRIORITY:
754 755
		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.priority = skb->priority;
756 757
		break;

758
	case OVS_KEY_ATTR_SKB_MARK:
759 760
		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
		flow_key->phy.skb_mark = skb->mark;
761 762
		break;

763
	case OVS_KEY_ATTR_TUNNEL_INFO:
764 765
		/* Masked data not supported for tunnel. */
		err = -EINVAL;
766 767
		break;

768
	case OVS_KEY_ATTR_ETHERNET:
769 770
		err = set_eth_addr(skb, flow_key, nla_data(a),
				   get_mask(a, struct ovs_key_ethernet *));
771 772 773
		break;

	case OVS_KEY_ATTR_IPV4:
774 775
		err = set_ipv4(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv4 *));
776 777
		break;

A
Ansis Atteka 已提交
778
	case OVS_KEY_ATTR_IPV6:
779 780
		err = set_ipv6(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv6 *));
A
Ansis Atteka 已提交
781 782
		break;

783
	case OVS_KEY_ATTR_TCP:
784 785
		err = set_tcp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_tcp *));
786 787 788
		break;

	case OVS_KEY_ATTR_UDP:
789 790
		err = set_udp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_udp *));
791
		break;
J
Joe Stringer 已提交
792 793

	case OVS_KEY_ATTR_SCTP:
794 795
		err = set_sctp(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_sctp *));
J
Joe Stringer 已提交
796
		break;
797 798

	case OVS_KEY_ATTR_MPLS:
799 800
		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
								    __be32 *));
801
		break;
802 803 804 805 806
	}

	return err;
}

807 808 809 810 811 812
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
			  struct sw_flow_key *key,
			  const struct nlattr *a, int rem)
{
	struct deferred_action *da;

813 814 815 816 817 818 819 820
	if (!is_flow_key_valid(key)) {
		int err;

		err = ovs_flow_key_update(skb, key);
		if (err)
			return err;
	}
	BUG_ON(!is_flow_key_valid(key));
821

822
	if (!nla_is_last(a, rem)) {
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
		/* Recirc action is the not the last action
		 * of the action list, need to clone the skb.
		 */
		skb = skb_clone(skb, GFP_ATOMIC);

		/* Skip the recirc action when out of memory, but
		 * continue on with the rest of the action list.
		 */
		if (!skb)
			return 0;
	}

	da = add_deferred_actions(skb, key, NULL);
	if (da) {
		da->pkt_key.recirc_id = nla_get_u32(a);
	} else {
		kfree_skb(skb);

		if (net_ratelimit())
			pr_warn("%s: deferred action limit reached, drop recirc action\n",
				ovs_dp_name(dp));
	}

	return 0;
}

849 850
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
851
			      struct sw_flow_key *key,
852
			      const struct nlattr *attr, int len)
853 854 855 856
{
	/* Every output action needs a separate clone of 'skb', but the common
	 * case is just a single output action, so that doing a clone and
	 * then freeing the original skbuff is wasteful.  So the following code
857 858
	 * is slightly obscure just to avoid that.
	 */
859 860 861 862 863 864 865 866
	int prev_port = -1;
	const struct nlattr *a;
	int rem;

	for (a = attr, rem = len; rem > 0;
	     a = nla_next(a, &rem)) {
		int err = 0;

867 868 869 870 871 872
		if (unlikely(prev_port != -1)) {
			struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);

			if (out_skb)
				do_output(dp, out_skb, prev_port);

873 874 875 876 877 878 879 880 881
			prev_port = -1;
		}

		switch (nla_type(a)) {
		case OVS_ACTION_ATTR_OUTPUT:
			prev_port = nla_get_u32(a);
			break;

		case OVS_ACTION_ATTR_USERSPACE:
882
			output_userspace(dp, skb, key, a, attr, len);
883 884
			break;

885 886 887 888
		case OVS_ACTION_ATTR_HASH:
			execute_hash(skb, key, a);
			break;

889
		case OVS_ACTION_ATTR_PUSH_MPLS:
890
			err = push_mpls(skb, key, nla_data(a));
891 892 893
			break;

		case OVS_ACTION_ATTR_POP_MPLS:
894
			err = pop_mpls(skb, key, nla_get_be16(a));
895 896
			break;

897
		case OVS_ACTION_ATTR_PUSH_VLAN:
898
			err = push_vlan(skb, key, nla_data(a));
899 900 901
			break;

		case OVS_ACTION_ATTR_POP_VLAN:
902
			err = pop_vlan(skb, key);
903 904
			break;

905 906
		case OVS_ACTION_ATTR_RECIRC:
			err = execute_recirc(dp, skb, key, a, rem);
907
			if (nla_is_last(a, rem)) {
908 909 910 911 912 913 914 915
				/* If this is the last action, the skb has
				 * been consumed or freed.
				 * Return immediately.
				 */
				return err;
			}
			break;

916
		case OVS_ACTION_ATTR_SET:
917
			err = execute_set_action(skb, key, nla_data(a));
918 919
			break;

920 921 922 923 924
		case OVS_ACTION_ATTR_SET_MASKED:
		case OVS_ACTION_ATTR_SET_TO_MASKED:
			err = execute_masked_set_action(skb, key, nla_data(a));
			break;

925
		case OVS_ACTION_ATTR_SAMPLE:
926
			err = sample(dp, skb, key, a, attr, len);
927 928 929 930 931 932 933 934 935
			break;
		}

		if (unlikely(err)) {
			kfree_skb(skb);
			return err;
		}
	}

936
	if (prev_port != -1)
937
		do_output(dp, skb, prev_port);
938
	else
939 940 941 942 943
		consume_skb(skb);

	return 0;
}

944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
static void process_deferred_actions(struct datapath *dp)
{
	struct action_fifo *fifo = this_cpu_ptr(action_fifos);

	/* Do not touch the FIFO in case there is no deferred actions. */
	if (action_fifo_is_empty(fifo))
		return;

	/* Finishing executing all deferred actions. */
	do {
		struct deferred_action *da = action_fifo_get(fifo);
		struct sk_buff *skb = da->skb;
		struct sw_flow_key *key = &da->pkt_key;
		const struct nlattr *actions = da->actions;

		if (actions)
			do_execute_actions(dp, skb, key, actions,
					   nla_len(actions));
		else
			ovs_dp_process_packet(skb, key);
	} while (!action_fifo_is_empty(fifo));

	/* Reset FIFO for the next packet.  */
	action_fifo_init(fifo);
}

970
/* Execute a list of actions against 'skb'. */
971
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
972 973
			const struct sw_flow_actions *acts,
			struct sw_flow_key *key)
974
{
975 976 977 978
	int level = this_cpu_read(exec_actions_level);
	int err;

	this_cpu_inc(exec_actions_level);
979
	OVS_CB(skb)->egress_tun_info = NULL;
980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
	err = do_execute_actions(dp, skb, key,
				 acts->actions, acts->actions_len);

	if (!level)
		process_deferred_actions(dp);

	this_cpu_dec(exec_actions_level);
	return err;
}

int action_fifos_init(void)
{
	action_fifos = alloc_percpu(struct action_fifo);
	if (!action_fifos)
		return -ENOMEM;
995

996 997 998 999 1000 1001
	return 0;
}

void action_fifos_exit(void)
{
	free_percpu(action_fifos);
1002
}