actions.c 33.2 KB
Newer Older
1
/*
2
 * Copyright (c) 2007-2014 Nicira, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
J
Joe Stringer 已提交
25
#include <linux/netfilter_ipv6.h>
J
Joe Stringer 已提交
26
#include <linux/sctp.h>
27 28 29 30 31
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
32

J
Joe Stringer 已提交
33
#include <net/dst.h>
34
#include <net/ip.h>
A
Ansis Atteka 已提交
35
#include <net/ipv6.h>
J
Joe Stringer 已提交
36
#include <net/ip6_fib.h>
37 38
#include <net/checksum.h>
#include <net/dsfield.h>
39
#include <net/mpls.h>
J
Joe Stringer 已提交
40
#include <net/sctp/checksum.h>
41 42

#include "datapath.h"
43
#include "flow.h"
J
Joe Stringer 已提交
44
#include "conntrack.h"
45 46 47
#include "vport.h"

static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
48
			      struct sw_flow_key *key,
49
			      const struct nlattr *attr, int len);
50

51 52 53
struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;
54
	int actions_len;
55 56 57 58 59

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

J
Joe Stringer 已提交
60 61 62 63 64 65
#define MAX_L2_LEN	(VLAN_ETH_HLEN + 3 * MPLS_HLEN)
struct ovs_frag_data {
	unsigned long dst;
	struct vport *vport;
	struct ovs_skb_cb cb;
	__be16 inner_protocol;
66 67
	u16 network_offset;	/* valid only for MPLS */
	u16 vlan_tci;
J
Joe Stringer 已提交
68 69
	__be16 vlan_proto;
	unsigned int l2_len;
70
	u8 mac_proto;
J
Joe Stringer 已提交
71 72 73 74 75
	u8 l2_data[MAX_L2_LEN];
};

static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);

76
#define DEFERRED_ACTION_FIFO_SIZE 10
77 78
#define OVS_RECURSION_LIMIT 5
#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
79 80 81 82 83 84 85
struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

86 87 88 89
struct recirc_keys {
	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};

90
static struct action_fifo __percpu *action_fifos;
91
static struct recirc_keys __percpu *recirc_keys;
92 93 94 95 96 97 98 99
static DEFINE_PER_CPU(int, exec_actions_level);

static void action_fifo_init(struct action_fifo *fifo)
{
	fifo->head = 0;
	fifo->tail = 0;
}

100
static bool action_fifo_is_empty(const struct action_fifo *fifo)
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
{
	return (fifo->head == fifo->tail);
}

static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
{
	if (action_fifo_is_empty(fifo))
		return NULL;

	return &fifo->fifo[fifo->tail++];
}

static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{
	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
		return NULL;

	return &fifo->fifo[fifo->head++];
}

/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
123 124 125
				    const struct sw_flow_key *key,
				    const struct nlattr *actions,
				    const int actions_len)
126 127 128 129 130 131 132 133
{
	struct action_fifo *fifo;
	struct deferred_action *da;

	fifo = this_cpu_ptr(action_fifos);
	da = action_fifo_put(fifo);
	if (da) {
		da->skb = skb;
134 135
		da->actions = actions;
		da->actions_len = actions_len;
136 137 138 139 140 141
		da->pkt_key = *key;
	}

	return da;
}

142 143
static void invalidate_flow_key(struct sw_flow_key *key)
{
144
	key->mac_proto |= SW_FLOW_KEY_INVALID;
145 146 147 148
}

static bool is_flow_key_valid(const struct sw_flow_key *key)
{
149
	return !(key->mac_proto & SW_FLOW_KEY_INVALID);
150 151
}

152 153 154 155 156 157 158 159 160 161 162 163 164
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
			     __be16 ethertype)
{
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
		__be16 diff[] = { ~(hdr->h_proto), ethertype };

		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
					~skb->csum);
	}

	hdr->h_proto = ethertype;
}

165
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
166 167
		     const struct ovs_action_push_mpls *mpls)
{
J
Jiri Benc 已提交
168
	struct mpls_shim_hdr *new_mpls_lse;
169 170 171 172 173 174 175 176

	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
	if (skb->encapsulation)
		return -ENOTSUPP;

	if (skb_cow_head(skb, MPLS_HLEN) < 0)
		return -ENOMEM;

D
David Ahern 已提交
177 178 179 180 181
	if (!skb->inner_protocol) {
		skb_set_inner_network_header(skb, skb->mac_len);
		skb_set_inner_protocol(skb, skb->protocol);
	}

182 183 184 185
	skb_push(skb, MPLS_HLEN);
	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);
	skb_reset_mac_header(skb);
D
David Ahern 已提交
186
	skb_set_network_header(skb, skb->mac_len);
187

J
Jiri Benc 已提交
188 189
	new_mpls_lse = mpls_hdr(skb);
	new_mpls_lse->label_stack_entry = mpls->mpls_lse;
190

191
	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
192

193 194
	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
		update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
195 196
	skb->protocol = mpls->mpls_ethertype;

197
	invalidate_flow_key(key);
198 199 200
	return 0;
}

201 202
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
		    const __be16 ethertype)
203 204 205
{
	int err;

206
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
207 208 209
	if (unlikely(err))
		return err;

J
Jiri Benc 已提交
210
	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
211 212 213 214 215 216

	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
		skb->mac_len);

	__skb_pull(skb, MPLS_HLEN);
	skb_reset_mac_header(skb);
D
David Ahern 已提交
217
	skb_set_network_header(skb, skb->mac_len);
218

219 220 221 222 223 224 225 226 227
	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
		struct ethhdr *hdr;

		/* mpls_hdr() is used to locate the ethertype field correctly in the
		 * presence of VLAN tags.
		 */
		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
		update_ethertype(skb, hdr, ethertype);
	}
228 229
	if (eth_p_mpls(skb->protocol))
		skb->protocol = ethertype;
230 231

	invalidate_flow_key(key);
232 233 234
	return 0;
}

235 236
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const __be32 *mpls_lse, const __be32 *mask)
237
{
J
Jiri Benc 已提交
238
	struct mpls_shim_hdr *stack;
239
	__be32 lse;
240 241
	int err;

242
	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
243 244 245
	if (unlikely(err))
		return err;

J
Jiri Benc 已提交
246 247
	stack = mpls_hdr(skb);
	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
248
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
J
Jiri Benc 已提交
249
		__be32 diff[] = { ~(stack->label_stack_entry), lse };
250

251 252 253 254
		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
					  ~skb->csum);
	}

J
Jiri Benc 已提交
255
	stack->label_stack_entry = lse;
256
	flow_key->mpls.top_lse = lse;
257 258 259
	return 0;
}

260
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
261 262 263
{
	int err;

264
	err = skb_vlan_pop(skb);
265
	if (skb_vlan_tag_present(skb)) {
266
		invalidate_flow_key(key);
267 268 269 270
	} else {
		key->eth.vlan.tci = 0;
		key->eth.vlan.tpid = 0;
	}
271
	return err;
272 273
}

274 275
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
		     const struct ovs_action_push_vlan *vlan)
276
{
277
	if (skb_vlan_tag_present(skb)) {
278
		invalidate_flow_key(key);
279 280 281 282
	} else {
		key->eth.vlan.tci = vlan->vlan_tci;
		key->eth.vlan.tpid = vlan->vlan_tpid;
	}
283 284
	return skb_vlan_push(skb, vlan->vlan_tpid,
			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
285 286
}

287 288 289 290 291 292 293
/* 'src' is already properly masked. */
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
{
	u16 *dst = (u16 *)dst_;
	const u16 *src = (const u16 *)src_;
	const u16 *mask = (const u16 *)mask_;

294 295 296
	OVS_SET_MASKED(dst[0], src[0], mask[0]);
	OVS_SET_MASKED(dst[1], src[1], mask[1]);
	OVS_SET_MASKED(dst[2], src[2], mask[2]);
297 298 299 300 301
}

static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
			const struct ovs_key_ethernet *key,
			const struct ovs_key_ethernet *mask)
302 303
{
	int err;
304

305
	err = skb_ensure_writable(skb, ETH_HLEN);
306 307 308
	if (unlikely(err))
		return err;

309 310
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);

311 312 313 314
	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
			       mask->eth_src);
	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
			       mask->eth_dst);
315

316
	skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
317

318 319
	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
320 321 322
	return 0;
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
/* pop_eth does not support VLAN packets as this action is never called
 * for them.
 */
static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
{
	skb_pull_rcsum(skb, ETH_HLEN);
	skb_reset_mac_header(skb);
	skb_reset_mac_len(skb);

	/* safe right before invalidate_flow_key */
	key->mac_proto = MAC_PROTO_NONE;
	invalidate_flow_key(key);
	return 0;
}

static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
		    const struct ovs_action_push_eth *ethh)
{
	struct ethhdr *hdr;

	/* Add the new Ethernet header */
	if (skb_cow_head(skb, ETH_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, ETH_HLEN);
	skb_reset_mac_header(skb);
	skb_reset_mac_len(skb);

	hdr = eth_hdr(skb);
	ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
	ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
	hdr->h_proto = skb->protocol;

	skb_postpush_rcsum(skb, hdr, ETH_HLEN);

	/* safe right before invalidate_flow_key */
	key->mac_proto = MAC_PROTO_ETHERNET;
	invalidate_flow_key(key);
	return 0;
}

364 365
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
				  __be32 addr, __be32 new_addr)
366 367 368
{
	int transport_len = skb->len - skb_transport_offset(skb);

369 370 371
	if (nh->frag_off & htons(IP_OFFSET))
		return;

372 373 374
	if (nh->protocol == IPPROTO_TCP) {
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
375
						 addr, new_addr, true);
376
	} else if (nh->protocol == IPPROTO_UDP) {
377 378 379 380 381
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace4(&uh->check, skb,
382
							 addr, new_addr, true);
383 384 385 386
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
387
	}
388
}
389

390 391 392 393
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
			__be32 *addr, __be32 new_addr)
{
	update_ip_l4_checksum(skb, nh, *addr, new_addr);
394
	csum_replace4(&nh->check, *addr, new_addr);
395
	skb_clear_hash(skb);
396 397 398
	*addr = new_addr;
}

A
Ansis Atteka 已提交
399 400 401 402 403
static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
				 __be32 addr[4], const __be32 new_addr[4])
{
	int transport_len = skb->len - skb_transport_offset(skb);

404
	if (l4_proto == NEXTHDR_TCP) {
A
Ansis Atteka 已提交
405 406
		if (likely(transport_len >= sizeof(struct tcphdr)))
			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
407
						  addr, new_addr, true);
408
	} else if (l4_proto == NEXTHDR_UDP) {
A
Ansis Atteka 已提交
409 410 411 412 413
		if (likely(transport_len >= sizeof(struct udphdr))) {
			struct udphdr *uh = udp_hdr(skb);

			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
				inet_proto_csum_replace16(&uh->check, skb,
414
							  addr, new_addr, true);
A
Ansis Atteka 已提交
415 416 417 418
				if (!uh->check)
					uh->check = CSUM_MANGLED_0;
			}
		}
419 420 421
	} else if (l4_proto == NEXTHDR_ICMP) {
		if (likely(transport_len >= sizeof(struct icmp6hdr)))
			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
422
						  skb, addr, new_addr, true);
A
Ansis Atteka 已提交
423 424 425
	}
}

426 427 428
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
			   const __be32 mask[4], __be32 masked[4])
{
429 430 431 432
	masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
	masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
	masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
	masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
433 434
}

A
Ansis Atteka 已提交
435 436 437 438 439 440 441
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
			  __be32 addr[4], const __be32 new_addr[4],
			  bool recalculate_csum)
{
	if (recalculate_csum)
		update_ipv6_checksum(skb, l4_proto, addr, new_addr);

442
	skb_clear_hash(skb);
A
Ansis Atteka 已提交
443 444 445
	memcpy(addr, new_addr, sizeof(__be32[4]));
}

446
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
A
Ansis Atteka 已提交
447
{
448
	/* Bits 21-24 are always unmasked, so this retains their values. */
449 450 451
	OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
	OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
	OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
A
Ansis Atteka 已提交
452 453
}

454 455
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
		       u8 mask)
A
Ansis Atteka 已提交
456
{
457
	new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
A
Ansis Atteka 已提交
458

459 460 461 462
	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
	nh->ttl = new_ttl;
}

463 464 465
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv4 *key,
		    const struct ovs_key_ipv4 *mask)
466 467
{
	struct iphdr *nh;
468
	__be32 new_addr;
469 470
	int err;

471 472
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct iphdr));
473 474 475 476 477
	if (unlikely(err))
		return err;

	nh = ip_hdr(skb);

478 479 480 481 482
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (mask->ipv4_src) {
483
		new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
484

485 486 487 488
		if (unlikely(new_addr != nh->saddr)) {
			set_ip_addr(skb, nh, &nh->saddr, new_addr);
			flow_key->ipv4.addr.src = new_addr;
		}
489
	}
490
	if (mask->ipv4_dst) {
491
		new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
492

493 494 495 496
		if (unlikely(new_addr != nh->daddr)) {
			set_ip_addr(skb, nh, &nh->daddr, new_addr);
			flow_key->ipv4.addr.dst = new_addr;
		}
497
	}
498 499 500 501 502 503 504
	if (mask->ipv4_tos) {
		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
		flow_key->ip.tos = nh->tos;
	}
	if (mask->ipv4_ttl) {
		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
		flow_key->ip.ttl = nh->ttl;
505
	}
506 507 508 509

	return 0;
}

510 511 512 513 514 515 516 517
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
{
	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
}

static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_ipv6 *key,
		    const struct ovs_key_ipv6 *mask)
A
Ansis Atteka 已提交
518 519 520 521
{
	struct ipv6hdr *nh;
	int err;

522 523
	err = skb_ensure_writable(skb, skb_network_offset(skb) +
				  sizeof(struct ipv6hdr));
A
Ansis Atteka 已提交
524 525 526 527 528
	if (unlikely(err))
		return err;

	nh = ipv6_hdr(skb);

529 530 531 532 533 534 535 536 537 538 539
	/* Setting an IP addresses is typically only a side effect of
	 * matching on them in the current userspace implementation, so it
	 * makes sense to check if the value actually changed.
	 */
	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
		__be32 *saddr = (__be32 *)&nh->saddr;
		__be32 masked[4];

		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);

		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
540
			set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
541 542 543 544 545 546
				      true);
			memcpy(&flow_key->ipv6.addr.src, masked,
			       sizeof(flow_key->ipv6.addr.src));
		}
	}
	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
A
Ansis Atteka 已提交
547 548 549
		unsigned int offset = 0;
		int flags = IP6_FH_F_SKIP_RH;
		bool recalc_csum = true;
550 551 552 553 554 555 556 557 558 559 560 561
		__be32 *daddr = (__be32 *)&nh->daddr;
		__be32 masked[4];

		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);

		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
			if (ipv6_ext_hdr(nh->nexthdr))
				recalc_csum = (ipv6_find_hdr(skb, &offset,
							     NEXTHDR_ROUTING,
							     NULL, &flags)
					       != NEXTHDR_ROUTING);

562
			set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
				      recalc_csum);
			memcpy(&flow_key->ipv6.addr.dst, masked,
			       sizeof(flow_key->ipv6.addr.dst));
		}
	}
	if (mask->ipv6_tclass) {
		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
		flow_key->ip.tos = ipv6_get_dsfield(nh);
	}
	if (mask->ipv6_label) {
		set_ipv6_fl(nh, ntohl(key->ipv6_label),
			    ntohl(mask->ipv6_label));
		flow_key->ipv6.label =
		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
	}
	if (mask->ipv6_hlimit) {
579 580
		OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
			       mask->ipv6_hlimit);
581
		flow_key->ip.ttl = nh->hop_limit;
A
Ansis Atteka 已提交
582 583 584 585
	}
	return 0;
}

586
/* Must follow skb_ensure_writable() since that can move the skb data. */
587
static void set_tp_port(struct sk_buff *skb, __be16 *port,
588
			__be16 new_port, __sum16 *check)
589
{
590
	inet_proto_csum_replace2(check, skb, *port, new_port, false);
591
	*port = new_port;
592 593
}

594 595 596
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_udp *key,
		   const struct ovs_key_udp *mask)
597 598
{
	struct udphdr *uh;
599
	__be16 src, dst;
600 601
	int err;

602 603
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct udphdr));
604 605 606 607
	if (unlikely(err))
		return err;

	uh = udp_hdr(skb);
608
	/* Either of the masks is non-zero, so do not bother checking them. */
609 610
	src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
	dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
611

612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
		if (likely(src != uh->source)) {
			set_tp_port(skb, &uh->source, src, &uh->check);
			flow_key->tp.src = src;
		}
		if (likely(dst != uh->dest)) {
			set_tp_port(skb, &uh->dest, dst, &uh->check);
			flow_key->tp.dst = dst;
		}

		if (unlikely(!uh->check))
			uh->check = CSUM_MANGLED_0;
	} else {
		uh->source = src;
		uh->dest = dst;
		flow_key->tp.src = src;
		flow_key->tp.dst = dst;
629
	}
630

631 632
	skb_clear_hash(skb);

633 634 635
	return 0;
}

636 637 638
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		   const struct ovs_key_tcp *key,
		   const struct ovs_key_tcp *mask)
639 640
{
	struct tcphdr *th;
641
	__be16 src, dst;
642 643
	int err;

644 645
	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
				  sizeof(struct tcphdr));
646 647 648 649
	if (unlikely(err))
		return err;

	th = tcp_hdr(skb);
650
	src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
651 652 653
	if (likely(src != th->source)) {
		set_tp_port(skb, &th->source, src, &th->check);
		flow_key->tp.src = src;
654
	}
655
	dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
656 657 658
	if (likely(dst != th->dest)) {
		set_tp_port(skb, &th->dest, dst, &th->check);
		flow_key->tp.dst = dst;
659
	}
660
	skb_clear_hash(skb);
661 662 663 664

	return 0;
}

665 666 667
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
		    const struct ovs_key_sctp *key,
		    const struct ovs_key_sctp *mask)
J
Joe Stringer 已提交
668
{
669
	unsigned int sctphoff = skb_transport_offset(skb);
J
Joe Stringer 已提交
670
	struct sctphdr *sh;
671
	__le32 old_correct_csum, new_csum, old_csum;
J
Joe Stringer 已提交
672 673
	int err;

674
	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
J
Joe Stringer 已提交
675 676 677 678
	if (unlikely(err))
		return err;

	sh = sctp_hdr(skb);
679 680
	old_csum = sh->checksum;
	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
681

682 683
	sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
	sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
J
Joe Stringer 已提交
684

685
	new_csum = sctp_compute_cksum(skb, sctphoff);
J
Joe Stringer 已提交
686

687 688
	/* Carry any checksum errors through. */
	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
J
Joe Stringer 已提交
689

690 691 692
	skb_clear_hash(skb);
	flow_key->tp.src = sh->source;
	flow_key->tp.dst = sh->dest;
J
Joe Stringer 已提交
693 694 695 696

	return 0;
}

697
static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
J
Joe Stringer 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
{
	struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
	struct vport *vport = data->vport;

	if (skb_cow_head(skb, data->l2_len) < 0) {
		kfree_skb(skb);
		return -ENOMEM;
	}

	__skb_dst_copy(skb, data->dst);
	*OVS_CB(skb) = data->cb;
	skb->inner_protocol = data->inner_protocol;
	skb->vlan_tci = data->vlan_tci;
	skb->vlan_proto = data->vlan_proto;

	/* Reconstruct the MAC header.  */
	skb_push(skb, data->l2_len);
	memcpy(skb->data, &data->l2_data, data->l2_len);
716
	skb_postpush_rcsum(skb, skb->data, data->l2_len);
J
Joe Stringer 已提交
717 718
	skb_reset_mac_header(skb);

719 720 721 722 723 724
	if (eth_p_mpls(skb->protocol)) {
		skb->inner_network_header = skb->network_header;
		skb_set_network_header(skb, data->network_offset);
		skb_reset_mac_len(skb);
	}

725
	ovs_vport_send(vport, skb, data->mac_proto);
J
Joe Stringer 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
	return 0;
}

static unsigned int
ovs_dst_get_mtu(const struct dst_entry *dst)
{
	return dst->dev->mtu;
}

static struct dst_ops ovs_dst_ops = {
	.family = AF_UNSPEC,
	.mtu = ovs_dst_get_mtu,
};

/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
 * ovs_vport_output(), which is called once per fragmented packet.
 */
743
static void prepare_frag(struct vport *vport, struct sk_buff *skb,
744
			 u16 orig_network_offset, u8 mac_proto)
J
Joe Stringer 已提交
745 746 747 748 749 750 751 752 753
{
	unsigned int hlen = skb_network_offset(skb);
	struct ovs_frag_data *data;

	data = this_cpu_ptr(&ovs_frag_data_storage);
	data->dst = skb->_skb_refdst;
	data->vport = vport;
	data->cb = *OVS_CB(skb);
	data->inner_protocol = skb->inner_protocol;
754
	data->network_offset = orig_network_offset;
J
Joe Stringer 已提交
755 756
	data->vlan_tci = skb->vlan_tci;
	data->vlan_proto = skb->vlan_proto;
757
	data->mac_proto = mac_proto;
J
Joe Stringer 已提交
758 759 760 761 762 763 764
	data->l2_len = hlen;
	memcpy(&data->l2_data, skb->data, hlen);

	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
	skb_pull(skb, hlen);
}

765
static void ovs_fragment(struct net *net, struct vport *vport,
766 767
			 struct sk_buff *skb, u16 mru,
			 struct sw_flow_key *key)
J
Joe Stringer 已提交
768
{
769 770 771 772 773 774 775
	u16 orig_network_offset = 0;

	if (eth_p_mpls(skb->protocol)) {
		orig_network_offset = skb_network_offset(skb);
		skb->network_header = skb->inner_network_header;
	}

J
Joe Stringer 已提交
776 777
	if (skb_network_offset(skb) > MAX_L2_LEN) {
		OVS_NLERR(1, "L2 header too long to fragment");
778
		goto err;
J
Joe Stringer 已提交
779 780
	}

781
	if (key->eth.type == htons(ETH_P_IP)) {
J
Joe Stringer 已提交
782 783 784
		struct dst_entry ovs_dst;
		unsigned long orig_dst;

785 786
		prepare_frag(vport, skb, orig_network_offset,
			     ovs_key_mac_proto(key));
J
Joe Stringer 已提交
787 788 789 790 791 792 793 794
		dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
		ovs_dst.dev = vport->dev;

		orig_dst = skb->_skb_refdst;
		skb_dst_set_noref(skb, &ovs_dst);
		IPCB(skb)->frag_max_size = mru;

795
		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
J
Joe Stringer 已提交
796
		refdst_drop(orig_dst);
797
	} else if (key->eth.type == htons(ETH_P_IPV6)) {
J
Joe Stringer 已提交
798 799 800 801
		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
		unsigned long orig_dst;
		struct rt6_info ovs_rt;

802
		if (!v6ops)
803
			goto err;
J
Joe Stringer 已提交
804

805 806
		prepare_frag(vport, skb, orig_network_offset,
			     ovs_key_mac_proto(key));
J
Joe Stringer 已提交
807 808 809 810 811 812 813 814 815
		memset(&ovs_rt, 0, sizeof(ovs_rt));
		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
			 DST_OBSOLETE_NONE, DST_NOCOUNT);
		ovs_rt.dst.dev = vport->dev;

		orig_dst = skb->_skb_refdst;
		skb_dst_set_noref(skb, &ovs_rt.dst);
		IP6CB(skb)->frag_max_size = mru;

816
		v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
J
Joe Stringer 已提交
817 818 819
		refdst_drop(orig_dst);
	} else {
		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
820
			  ovs_vport_name(vport), ntohs(key->eth.type), mru,
J
Joe Stringer 已提交
821
			  vport->dev->mtu);
822
		goto err;
J
Joe Stringer 已提交
823
	}
824 825 826 827

	return;
err:
	kfree_skb(skb);
J
Joe Stringer 已提交
828 829 830 831
}

static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
		      struct sw_flow_key *key)
832
{
833
	struct vport *vport = ovs_vport_rcu(dp, out_port);
834

J
Joe Stringer 已提交
835 836
	if (likely(vport)) {
		u16 mru = OVS_CB(skb)->mru;
837 838 839
		u32 cutlen = OVS_CB(skb)->cutlen;

		if (unlikely(cutlen > 0)) {
840
			if (skb->len - cutlen > ovs_mac_header_len(key))
841 842
				pskb_trim(skb, skb->len - cutlen);
			else
843
				pskb_trim(skb, ovs_mac_header_len(key));
844
		}
J
Joe Stringer 已提交
845

846 847
		if (likely(!mru ||
		           (skb->len <= mru + vport->dev->hard_header_len))) {
848
			ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
J
Joe Stringer 已提交
849
		} else if (mru <= vport->dev->mtu) {
850
			struct net *net = read_pnet(&dp->net);
J
Joe Stringer 已提交
851

852
			ovs_fragment(net, vport, skb, mru, key);
J
Joe Stringer 已提交
853 854 855 856
		} else {
			kfree_skb(skb);
		}
	} else {
857
		kfree_skb(skb);
J
Joe Stringer 已提交
858
	}
859 860 861
}

static int output_userspace(struct datapath *dp, struct sk_buff *skb,
862
			    struct sw_flow_key *key, const struct nlattr *attr,
863 864
			    const struct nlattr *actions, int actions_len,
			    uint32_t cutlen)
865 866 867 868 869
{
	struct dp_upcall_info upcall;
	const struct nlattr *a;
	int rem;

870
	memset(&upcall, 0, sizeof(upcall));
871
	upcall.cmd = OVS_PACKET_CMD_ACTION;
J
Joe Stringer 已提交
872
	upcall.mru = OVS_CB(skb)->mru;
873 874 875 876 877 878 879 880 881

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_USERSPACE_ATTR_USERDATA:
			upcall.userdata = a;
			break;

		case OVS_USERSPACE_ATTR_PID:
882
			upcall.portid = nla_get_u32(a);
883
			break;
884 885 886 887 888 889 890 891 892

		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
			/* Get out tunnel info. */
			struct vport *vport;

			vport = ovs_vport_rcu(dp, nla_get_u32(a));
			if (vport) {
				int err;

893 894 895
				err = dev_fill_metadata_dst(vport->dev, skb);
				if (!err)
					upcall.egress_tun_info = skb_tunnel_info(skb);
896
			}
897

898
			break;
899
		}
900

901 902 903 904 905 906 907
		case OVS_USERSPACE_ATTR_ACTIONS: {
			/* Include actions. */
			upcall.actions = actions;
			upcall.actions_len = actions_len;
			break;
		}

908
		} /* End of switch. */
909 910
	}

911
	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
912 913 914
}

static int sample(struct datapath *dp, struct sk_buff *skb,
915 916
		  struct sw_flow_key *key, const struct nlattr *attr,
		  const struct nlattr *actions, int actions_len)
917 918 919 920
{
	const struct nlattr *acts_list = NULL;
	const struct nlattr *a;
	int rem;
921
	u32 cutlen = 0;
922 923 924

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
		 a = nla_next(a, &rem)) {
925 926
		u32 probability;

927 928
		switch (nla_type(a)) {
		case OVS_SAMPLE_ATTR_PROBABILITY:
929 930
			probability = nla_get_u32(a);
			if (!probability || prandom_u32() > probability)
931 932 933 934 935 936 937 938 939
				return 0;
			break;

		case OVS_SAMPLE_ATTR_ACTIONS:
			acts_list = a;
			break;
		}
	}

940 941 942
	rem = nla_len(acts_list);
	a = nla_data(acts_list);

943 944 945
	/* Actions list is empty, do nothing */
	if (unlikely(!rem))
		return 0;
946

947
	/* The only known usage of sample action is having a single user-space
948
	 * action, or having a truncate action followed by a single user-space
949 950 951
	 * action. Treat this usage as a special case.
	 * The output_userspace() should clone the skb to be sent to the
	 * user space. This skb will be consumed by its caller.
952
	 */
953 954 955 956 957 958 959 960 961
	if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
		struct ovs_action_trunc *trunc = nla_data(a);

		if (skb->len > trunc->max_len)
			cutlen = skb->len - trunc->max_len;

		a = nla_next(a, &rem);
	}

962
	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
963
		   nla_is_last(a, rem)))
964 965
		return output_userspace(dp, skb, key, a, actions,
					actions_len, cutlen);
966 967 968 969 970 971

	skb = skb_clone(skb, GFP_ATOMIC);
	if (!skb)
		/* Skip the sample action when out of memory. */
		return 0;

972 973
	if (!add_deferred_actions(skb, key, nla_data(acts_list),
				  nla_len(acts_list))) {
974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
		if (net_ratelimit())
			pr_warn("%s: deferred actions limit reached, dropping sample action\n",
				ovs_dp_name(dp));

		kfree_skb(skb);
	}
	return 0;
}

static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
			 const struct nlattr *attr)
{
	struct ovs_action_hash *hash_act = nla_data(attr);
	u32 hash = 0;

	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
	hash = skb_get_hash(skb);
	hash = jhash_1word(hash, hash_act->hash_basis);
	if (!hash)
		hash = 0x1;

	key->ovs_flow_hash = hash;
996 997
}

998 999 1000 1001 1002 1003
static int execute_set_action(struct sk_buff *skb,
			      struct sw_flow_key *flow_key,
			      const struct nlattr *a)
{
	/* Only tunnel set execution is supported without a mask. */
	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
1004 1005 1006 1007 1008
		struct ovs_tunnel_info *tun = nla_data(a);

		skb_dst_drop(skb);
		dst_hold((struct dst_entry *)tun->tun_dst);
		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
		return 0;
	}

	return -EINVAL;
}

/* Mask is at the midpoint of the data. */
#define get_mask(a, type) ((const type)nla_data(a) + 1)

static int execute_masked_set_action(struct sk_buff *skb,
				     struct sw_flow_key *flow_key,
				     const struct nlattr *a)
1021 1022 1023
{
	int err = 0;

1024
	switch (nla_type(a)) {
1025
	case OVS_KEY_ATTR_PRIORITY:
1026 1027
		OVS_SET_MASKED(skb->priority, nla_get_u32(a),
			       *get_mask(a, u32 *));
1028
		flow_key->phy.priority = skb->priority;
1029 1030
		break;

1031
	case OVS_KEY_ATTR_SKB_MARK:
1032
		OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
1033
		flow_key->phy.skb_mark = skb->mark;
1034 1035
		break;

1036
	case OVS_KEY_ATTR_TUNNEL_INFO:
1037 1038
		/* Masked data not supported for tunnel. */
		err = -EINVAL;
1039 1040
		break;

1041
	case OVS_KEY_ATTR_ETHERNET:
1042 1043
		err = set_eth_addr(skb, flow_key, nla_data(a),
				   get_mask(a, struct ovs_key_ethernet *));
1044 1045 1046
		break;

	case OVS_KEY_ATTR_IPV4:
1047 1048
		err = set_ipv4(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv4 *));
1049 1050
		break;

A
Ansis Atteka 已提交
1051
	case OVS_KEY_ATTR_IPV6:
1052 1053
		err = set_ipv6(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_ipv6 *));
A
Ansis Atteka 已提交
1054 1055
		break;

1056
	case OVS_KEY_ATTR_TCP:
1057 1058
		err = set_tcp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_tcp *));
1059 1060 1061
		break;

	case OVS_KEY_ATTR_UDP:
1062 1063
		err = set_udp(skb, flow_key, nla_data(a),
			      get_mask(a, struct ovs_key_udp *));
1064
		break;
J
Joe Stringer 已提交
1065 1066

	case OVS_KEY_ATTR_SCTP:
1067 1068
		err = set_sctp(skb, flow_key, nla_data(a),
			       get_mask(a, struct ovs_key_sctp *));
J
Joe Stringer 已提交
1069
		break;
1070 1071

	case OVS_KEY_ATTR_MPLS:
1072 1073
		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
								    __be32 *));
1074
		break;
J
Joe Stringer 已提交
1075 1076 1077

	case OVS_KEY_ATTR_CT_STATE:
	case OVS_KEY_ATTR_CT_ZONE:
1078
	case OVS_KEY_ATTR_CT_MARK:
J
Joe Stringer 已提交
1079
	case OVS_KEY_ATTR_CT_LABELS:
1080 1081
	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
J
Joe Stringer 已提交
1082 1083
		err = -EINVAL;
		break;
1084 1085 1086 1087 1088
	}

	return err;
}

1089 1090 1091 1092 1093
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
			  struct sw_flow_key *key,
			  const struct nlattr *a, int rem)
{
	struct deferred_action *da;
1094
	int level;
1095

1096 1097 1098 1099 1100 1101 1102 1103
	if (!is_flow_key_valid(key)) {
		int err;

		err = ovs_flow_key_update(skb, key);
		if (err)
			return err;
	}
	BUG_ON(!is_flow_key_valid(key));
1104

1105
	if (!nla_is_last(a, rem)) {
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
		/* Recirc action is the not the last action
		 * of the action list, need to clone the skb.
		 */
		skb = skb_clone(skb, GFP_ATOMIC);

		/* Skip the recirc action when out of memory, but
		 * continue on with the rest of the action list.
		 */
		if (!skb)
			return 0;
	}

1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
	level = this_cpu_read(exec_actions_level);
	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
		struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
		struct sw_flow_key *recirc_key = &rks->key[level - 1];

		*recirc_key = *key;
		recirc_key->recirc_id = nla_get_u32(a);
		ovs_dp_process_packet(skb, recirc_key);

		return 0;
	}

1130
	da = add_deferred_actions(skb, key, NULL, 0);
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
	if (da) {
		da->pkt_key.recirc_id = nla_get_u32(a);
	} else {
		kfree_skb(skb);

		if (net_ratelimit())
			pr_warn("%s: deferred action limit reached, drop recirc action\n",
				ovs_dp_name(dp));
	}

	return 0;
}

1144 1145
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1146
			      struct sw_flow_key *key,
1147
			      const struct nlattr *attr, int len)
1148 1149 1150 1151 1152 1153 1154 1155
{
	const struct nlattr *a;
	int rem;

	for (a = attr, rem = len; rem > 0;
	     a = nla_next(a, &rem)) {
		int err = 0;

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
		switch (nla_type(a)) {
		case OVS_ACTION_ATTR_OUTPUT: {
			int port = nla_get_u32(a);
			struct sk_buff *clone;

			/* Every output action needs a separate clone
			 * of 'skb', In case the output action is the
			 * last action, cloning can be avoided.
			 */
			if (nla_is_last(a, rem)) {
				do_output(dp, skb, port, key);
				/* 'skb' has been used for output.
				 */
				return 0;
			}
1171

1172 1173 1174
			clone = skb_clone(skb, GFP_ATOMIC);
			if (clone)
				do_output(dp, clone, port, key);
1175
			OVS_CB(skb)->cutlen = 0;
1176
			break;
1177
		}
1178

1179 1180 1181 1182 1183 1184 1185 1186
		case OVS_ACTION_ATTR_TRUNC: {
			struct ovs_action_trunc *trunc = nla_data(a);

			if (skb->len > trunc->max_len)
				OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
			break;
		}

1187
		case OVS_ACTION_ATTR_USERSPACE:
1188 1189 1190
			output_userspace(dp, skb, key, a, attr,
						     len, OVS_CB(skb)->cutlen);
			OVS_CB(skb)->cutlen = 0;
1191 1192
			break;

1193 1194 1195 1196
		case OVS_ACTION_ATTR_HASH:
			execute_hash(skb, key, a);
			break;

1197
		case OVS_ACTION_ATTR_PUSH_MPLS:
1198
			err = push_mpls(skb, key, nla_data(a));
1199 1200 1201
			break;

		case OVS_ACTION_ATTR_POP_MPLS:
1202
			err = pop_mpls(skb, key, nla_get_be16(a));
1203 1204
			break;

1205
		case OVS_ACTION_ATTR_PUSH_VLAN:
1206
			err = push_vlan(skb, key, nla_data(a));
1207 1208 1209
			break;

		case OVS_ACTION_ATTR_POP_VLAN:
1210
			err = pop_vlan(skb, key);
1211 1212
			break;

1213 1214
		case OVS_ACTION_ATTR_RECIRC:
			err = execute_recirc(dp, skb, key, a, rem);
1215
			if (nla_is_last(a, rem)) {
1216 1217 1218 1219 1220 1221 1222 1223
				/* If this is the last action, the skb has
				 * been consumed or freed.
				 * Return immediately.
				 */
				return err;
			}
			break;

1224
		case OVS_ACTION_ATTR_SET:
1225
			err = execute_set_action(skb, key, nla_data(a));
1226 1227
			break;

1228 1229 1230 1231 1232
		case OVS_ACTION_ATTR_SET_MASKED:
		case OVS_ACTION_ATTR_SET_TO_MASKED:
			err = execute_masked_set_action(skb, key, nla_data(a));
			break;

1233
		case OVS_ACTION_ATTR_SAMPLE:
1234
			err = sample(dp, skb, key, a, attr, len);
1235
			break;
J
Joe Stringer 已提交
1236 1237

		case OVS_ACTION_ATTR_CT:
1238 1239 1240 1241 1242 1243
			if (!is_flow_key_valid(key)) {
				err = ovs_flow_key_update(skb, key);
				if (err)
					return err;
			}

J
Joe Stringer 已提交
1244 1245 1246 1247
			err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
					     nla_data(a));

			/* Hide stolen IP fragments from user space. */
1248 1249
			if (err)
				return err == -EINPROGRESS ? 0 : err;
J
Joe Stringer 已提交
1250
			break;
1251 1252 1253 1254 1255 1256 1257 1258

		case OVS_ACTION_ATTR_PUSH_ETH:
			err = push_eth(skb, key, nla_data(a));
			break;

		case OVS_ACTION_ATTR_POP_ETH:
			err = pop_eth(skb, key);
			break;
1259 1260 1261 1262 1263 1264 1265 1266
		}

		if (unlikely(err)) {
			kfree_skb(skb);
			return err;
		}
	}

1267
	consume_skb(skb);
1268 1269 1270
	return 0;
}

1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
static void process_deferred_actions(struct datapath *dp)
{
	struct action_fifo *fifo = this_cpu_ptr(action_fifos);

	/* Do not touch the FIFO in case there is no deferred actions. */
	if (action_fifo_is_empty(fifo))
		return;

	/* Finishing executing all deferred actions. */
	do {
		struct deferred_action *da = action_fifo_get(fifo);
		struct sk_buff *skb = da->skb;
		struct sw_flow_key *key = &da->pkt_key;
		const struct nlattr *actions = da->actions;
1285
		int actions_len = da->actions_len;
1286 1287

		if (actions)
1288
			do_execute_actions(dp, skb, key, actions, actions_len);
1289 1290 1291 1292 1293 1294 1295 1296
		else
			ovs_dp_process_packet(skb, key);
	} while (!action_fifo_is_empty(fifo));

	/* Reset FIFO for the next packet.  */
	action_fifo_init(fifo);
}

1297
/* Execute a list of actions against 'skb'. */
1298
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
1299 1300
			const struct sw_flow_actions *acts,
			struct sw_flow_key *key)
1301
{
1302 1303 1304
	int err, level;

	level = __this_cpu_inc_return(exec_actions_level);
1305
	if (unlikely(level > OVS_RECURSION_LIMIT)) {
1306 1307 1308 1309 1310 1311
		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
				     ovs_dp_name(dp));
		kfree_skb(skb);
		err = -ENETDOWN;
		goto out;
	}
1312 1313 1314 1315

	err = do_execute_actions(dp, skb, key,
				 acts->actions, acts->actions_len);

1316
	if (level == 1)
1317 1318
		process_deferred_actions(dp);

1319 1320
out:
	__this_cpu_dec(exec_actions_level);
1321 1322 1323 1324 1325 1326 1327 1328
	return err;
}

int action_fifos_init(void)
{
	action_fifos = alloc_percpu(struct action_fifo);
	if (!action_fifos)
		return -ENOMEM;
1329

1330 1331 1332 1333 1334 1335
	recirc_keys = alloc_percpu(struct recirc_keys);
	if (!recirc_keys) {
		free_percpu(action_fifos);
		return -ENOMEM;
	}

1336 1337 1338 1339 1340 1341
	return 0;
}

void action_fifos_exit(void)
{
	free_percpu(action_fifos);
1342
	free_percpu(recirc_keys);
1343
}