datapath.c 59.9 KB
Newer Older
1
/*
2
 * Copyright (c) 2007-2014 Nicira, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/init.h>
#include <linux/module.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/jhash.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/etherdevice.h>
#include <linux/genetlink.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ethtool.h>
#include <linux/wait.h>
#include <asm/div64.h>
#include <linux/highmem.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
#include <net/genetlink.h>
51 52
#include <net/net_namespace.h>
#include <net/netns/generic.h>
53 54 55

#include "datapath.h"
#include "flow.h"
56
#include "flow_table.h"
57
#include "flow_netlink.h"
A
Andy Zhou 已提交
58
#include "meter.h"
59
#include "vport-internal_dev.h"
60
#include "vport-netdev.h"
61

62
unsigned int ovs_net_id __read_mostly;
63

64 65 66 67
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
static struct genl_family dp_datapath_genl_family;

68 69
static const struct nla_policy flow_policy[];

70 71
static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
	.name = OVS_FLOW_MCGROUP,
72 73
};

74 75
static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
	.name = OVS_DATAPATH_MCGROUP,
76 77
};

78 79
static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
	.name = OVS_VPORT_MCGROUP,
80 81
};

82 83
/* Check if need to build a reply message.
 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
84 85
static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
			    unsigned int group)
86 87
{
	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
88
	       genl_has_listeners(family, genl_info_net(info), group);
89 90
}

91
static void ovs_notify(struct genl_family *family,
92
		       struct sk_buff *skb, struct genl_info *info)
93
{
J
Jiri Benc 已提交
94
	genl_notify(family, skb, info, 0, GFP_KERNEL);
95 96
}

97 98 99
/**
 * DOC: Locking:
 *
100 101 102 103
 * All writes e.g. Writes to device state (add/remove datapath, port, set
 * operations on vports, etc.), Writes to other state (flow table
 * modifications, set miscellaneous datapath parameters, etc.) are protected
 * by ovs_lock.
104 105 106 107 108 109
 *
 * Reads are protected by RCU.
 *
 * There are a few special cases (mostly stats) that have their own
 * synchronization but they nest under all of above and don't interact with
 * each other.
110 111
 *
 * The RTNL lock nests inside ovs_mutex.
112 113
 */

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
static DEFINE_MUTEX(ovs_mutex);

void ovs_lock(void)
{
	mutex_lock(&ovs_mutex);
}

void ovs_unlock(void)
{
	mutex_unlock(&ovs_mutex);
}

#ifdef CONFIG_LOCKDEP
int lockdep_ovsl_is_held(void)
{
	if (debug_locks)
		return lockdep_is_held(&ovs_mutex);
	else
		return 1;
}
#endif

136
static struct vport *new_vport(const struct vport_parms *);
137
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
138
			     const struct sw_flow_key *,
139 140
			     const struct dp_upcall_info *,
			     uint32_t cutlen);
141
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
142
				  const struct sw_flow_key *,
143 144
				  const struct dp_upcall_info *,
				  uint32_t cutlen);
145

146
/* Must be called with rcu_read_lock or ovs_mutex. */
147
const char *ovs_dp_name(const struct datapath *dp)
148
{
149
	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
150
	return ovs_vport_name(vport);
151 152
}

153
static int get_dpifindex(const struct datapath *dp)
154 155 156 157 158 159
{
	struct vport *local;
	int ifindex;

	rcu_read_lock();

160
	local = ovs_vport_rcu(dp, OVSP_LOCAL);
161
	if (local)
162
		ifindex = local->dev->ifindex;
163 164 165 166 167 168 169 170 171 172 173 174
	else
		ifindex = 0;

	rcu_read_unlock();

	return ifindex;
}

static void destroy_dp_rcu(struct rcu_head *rcu)
{
	struct datapath *dp = container_of(rcu, struct datapath, rcu);

175
	ovs_flow_tbl_destroy(&dp->table);
176
	free_percpu(dp->stats_percpu);
177
	kfree(dp->ports);
A
Andy Zhou 已提交
178
	ovs_meters_exit(dp);
179 180 181
	kfree(dp);
}

182 183 184 185 186 187
static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
					    u16 port_no)
{
	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
}

J
Jarno Rajahalme 已提交
188
/* Called with ovs_mutex or RCU read lock. */
189 190 191 192 193 194
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
{
	struct vport *vport;
	struct hlist_head *head;

	head = vport_hash_bucket(dp, port_no);
195
	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
196 197 198 199 200 201
		if (vport->port_no == port_no)
			return vport;
	}
	return NULL;
}

202
/* Called with ovs_mutex. */
203 204 205 206 207 208 209
static struct vport *new_vport(const struct vport_parms *parms)
{
	struct vport *vport;

	vport = ovs_vport_add(parms);
	if (!IS_ERR(vport)) {
		struct datapath *dp = parms->dp;
210
		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
211

212
		hlist_add_head_rcu(&vport->dp_hash_node, head);
213 214 215 216 217 218
	}
	return vport;
}

void ovs_dp_detach_port(struct vport *p)
{
219
	ASSERT_OVSL();
220 221

	/* First drop references to device. */
222
	hlist_del_rcu(&p->dp_hash_node);
223 224 225 226 227 228

	/* Then destroy it. */
	ovs_vport_del(p);
}

/* Must be called with rcu_read_lock. */
229
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
230
{
231
	const struct vport *p = OVS_CB(skb)->input_vport;
232 233
	struct datapath *dp = p->dp;
	struct sw_flow *flow;
234
	struct sw_flow_actions *sf_acts;
235 236
	struct dp_stats_percpu *stats;
	u64 *stats_counter;
237
	u32 n_mask_hit;
238

239
	stats = this_cpu_ptr(dp->stats_percpu);
240 241

	/* Look up flow. */
242
	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
243 244
	if (unlikely(!flow)) {
		struct dp_upcall_info upcall;
245
		int error;
246

247
		memset(&upcall, 0, sizeof(upcall));
248
		upcall.cmd = OVS_PACKET_CMD_MISS;
249
		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
J
Joe Stringer 已提交
250
		upcall.mru = OVS_CB(skb)->mru;
251
		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
252 253 254 255
		if (unlikely(error))
			kfree_skb(skb);
		else
			consume_skb(skb);
256 257 258 259
		stats_counter = &stats->n_missed;
		goto out;
	}

260 261 262
	ovs_flow_stats_update(flow, key->tp.flags, skb);
	sf_acts = rcu_dereference(flow->sf_acts);
	ovs_execute_actions(dp, skb, sf_acts, key);
263

264
	stats_counter = &stats->n_hit;
265 266 267

out:
	/* Update datapath statistics. */
268
	u64_stats_update_begin(&stats->syncp);
269
	(*stats_counter)++;
270
	stats->n_mask_hit += n_mask_hit;
271
	u64_stats_update_end(&stats->syncp);
272 273 274
}

int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
275
		  const struct sw_flow_key *key,
276 277
		  const struct dp_upcall_info *upcall_info,
		  uint32_t cutlen)
278 279 280 281
{
	struct dp_stats_percpu *stats;
	int err;

282
	if (upcall_info->portid == 0) {
283 284 285 286 287
		err = -ENOTCONN;
		goto err;
	}

	if (!skb_is_gso(skb))
288
		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
289
	else
290
		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
291 292 293 294 295 296
	if (err)
		goto err;

	return 0;

err:
297
	stats = this_cpu_ptr(dp->stats_percpu);
298

299
	u64_stats_update_begin(&stats->syncp);
300
	stats->n_lost++;
301
	u64_stats_update_end(&stats->syncp);
302 303 304 305

	return err;
}

306
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
307
			     const struct sw_flow_key *key,
308 309
			     const struct dp_upcall_info *upcall_info,
				 uint32_t cutlen)
310
{
311
	unsigned int gso_type = skb_shinfo(skb)->gso_type;
312
	struct sw_flow_key later_key;
313 314 315
	struct sk_buff *segs, *nskb;
	int err;

316
	BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
317
	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
318 319
	if (IS_ERR(segs))
		return PTR_ERR(segs);
320 321
	if (segs == NULL)
		return -EINVAL;
322

323 324 325 326 327 328 329 330 331
	if (gso_type & SKB_GSO_UDP) {
		/* The initial flow key extracted by ovs_flow_key_extract()
		 * in this case is for a first fragment, so we need to
		 * properly mark later fragments.
		 */
		later_key = *key;
		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
	}

332 333 334
	/* Queue all of the segments. */
	skb = segs;
	do {
335 336 337
		if (gso_type & SKB_GSO_UDP && skb != segs)
			key = &later_key;

338
		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
		if (err)
			break;

	} while ((skb = skb->next));

	/* Free all of the segments. */
	skb = segs;
	do {
		nskb = skb->next;
		if (err)
			kfree_skb(skb);
		else
			consume_skb(skb);
	} while ((skb = nskb));
	return err;
}

356
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
357
			      unsigned int hdrlen, int actions_attrlen)
358 359
{
	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
360
		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
361 362
		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
		+ nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
363 364

	/* OVS_PACKET_ATTR_USERDATA */
365 366 367 368 369 370
	if (upcall_info->userdata)
		size += NLA_ALIGN(upcall_info->userdata->nla_len);

	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
	if (upcall_info->egress_tun_info)
		size += nla_total_size(ovs_tun_key_attr_size());
371

372 373
	/* OVS_PACKET_ATTR_ACTIONS */
	if (upcall_info->actions_len)
374
		size += nla_total_size(actions_attrlen);
375

J
Joe Stringer 已提交
376 377 378 379
	/* OVS_PACKET_ATTR_MRU */
	if (upcall_info->mru)
		size += nla_total_size(sizeof(upcall_info->mru));

380 381 382
	return size;
}

J
Joe Stringer 已提交
383 384 385 386 387 388
static void pad_packet(struct datapath *dp, struct sk_buff *skb)
{
	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
		size_t plen = NLA_ALIGN(skb->len) - skb->len;

		if (plen > 0)
389
			skb_put_zero(skb, plen);
J
Joe Stringer 已提交
390 391 392
	}
}

393
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
394
				  const struct sw_flow_key *key,
395 396
				  const struct dp_upcall_info *upcall_info,
				  uint32_t cutlen)
397 398 399
{
	struct ovs_header *upcall;
	struct sk_buff *nskb = NULL;
L
Li RongQing 已提交
400
	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
401
	struct nlattr *nla;
402
	size_t len;
403
	unsigned int hlen;
404 405 406 407 408
	int err, dp_ifindex;

	dp_ifindex = get_dpifindex(dp);
	if (!dp_ifindex)
		return -ENODEV;
409

410
	if (skb_vlan_tag_present(skb)) {
411 412 413 414
		nskb = skb_clone(skb, GFP_ATOMIC);
		if (!nskb)
			return -ENOMEM;

415
		nskb = __vlan_hwaccel_push_inside(nskb);
416
		if (!nskb)
417 418 419 420 421 422 423 424 425 426
			return -ENOMEM;

		skb = nskb;
	}

	if (nla_attr_size(skb->len) > USHRT_MAX) {
		err = -EFBIG;
		goto out;
	}

427 428
	/* Complete checksum if needed */
	if (skb->ip_summed == CHECKSUM_PARTIAL &&
429
	    (err = skb_csum_hwoffload_help(skb, 0)))
430 431 432 433 434 435 436 437 438 439 440
		goto out;

	/* Older versions of OVS user space enforce alignment of the last
	 * Netlink attribute to NLA_ALIGNTO which would require extensive
	 * padding logic. Only perform zerocopy if padding is not required.
	 */
	if (dp->user_features & OVS_DP_F_UNALIGNED)
		hlen = skb_zerocopy_headlen(skb);
	else
		hlen = skb->len;

441 442
	len = upcall_msg_size(upcall_info, hlen - cutlen,
			      OVS_CB(skb)->acts_origlen);
443
	user_skb = genlmsg_new(len, GFP_ATOMIC);
444 445 446 447 448 449 450 451 452
	if (!user_skb) {
		err = -ENOMEM;
		goto out;
	}

	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
			     0, upcall_info->cmd);
	upcall->dp_ifindex = dp_ifindex;

453
	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
454
	BUG_ON(err);
455 456

	if (upcall_info->userdata)
457 458 459
		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
			  nla_len(upcall_info->userdata),
			  nla_data(upcall_info->userdata));
460

461 462
	if (upcall_info->egress_tun_info) {
		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
463 464
		err = ovs_nla_put_tunnel_info(user_skb,
					      upcall_info->egress_tun_info);
465 466 467 468
		BUG_ON(err);
		nla_nest_end(user_skb, nla);
	}

469 470 471 472 473 474 475 476 477 478 479
	if (upcall_info->actions_len) {
		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
		err = ovs_nla_put_actions(upcall_info->actions,
					  upcall_info->actions_len,
					  user_skb);
		if (!err)
			nla_nest_end(user_skb, nla);
		else
			nla_nest_cancel(user_skb, nla);
	}

J
Joe Stringer 已提交
480 481 482 483 484 485 486 487 488 489
	/* Add OVS_PACKET_ATTR_MRU */
	if (upcall_info->mru) {
		if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
				upcall_info->mru)) {
			err = -ENOBUFS;
			goto out;
		}
		pad_packet(dp, user_skb);
	}

490 491 492 493 494 495 496 497 498 499
	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
	if (cutlen > 0) {
		if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
				skb->len)) {
			err = -ENOBUFS;
			goto out;
		}
		pad_packet(dp, user_skb);
	}

500 501 502 503 504 505
	/* Only reserve room for attribute header, packet data is added
	 * in skb_zerocopy() */
	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
		err = -ENOBUFS;
		goto out;
	}
506
	nla->nla_len = nla_attr_size(skb->len - cutlen);
507

508
	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
509 510
	if (err)
		goto out;
511

512
	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
J
Joe Stringer 已提交
513
	pad_packet(dp, user_skb);
514

515
	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
516

517
	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
L
Li RongQing 已提交
518
	user_skb = NULL;
519
out:
520 521
	if (err)
		skb_tx_error(skb);
L
Li RongQing 已提交
522
	kfree_skb(user_skb);
523 524 525 526 527 528 529
	kfree_skb(nskb);
	return err;
}

static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
	struct ovs_header *ovs_header = info->userhdr;
J
Joe Stringer 已提交
530
	struct net *net = sock_net(skb->sk);
531 532 533 534
	struct nlattr **a = info->attrs;
	struct sw_flow_actions *acts;
	struct sk_buff *packet;
	struct sw_flow *flow;
535
	struct sw_flow_actions *sf_acts;
536
	struct datapath *dp;
537
	struct vport *input_vport;
J
Joe Stringer 已提交
538
	u16 mru = 0;
539 540
	int len;
	int err;
541
	bool log = !a[OVS_PACKET_ATTR_PROBE];
542 543 544

	err = -EINVAL;
	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
545
	    !a[OVS_PACKET_ATTR_ACTIONS])
546 547 548 549 550 551 552 553 554
		goto err;

	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
	err = -ENOMEM;
	if (!packet)
		goto err;
	skb_reserve(packet, NET_IP_ALIGN);

555
	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
556

J
Joe Stringer 已提交
557 558 559 560 561 562 563
	/* Set packet's mru */
	if (a[OVS_PACKET_ATTR_MRU]) {
		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
		packet->ignore_df = 1;
	}
	OVS_CB(packet)->mru = mru;

564
	/* Build an sw_flow for sending this packet. */
565
	flow = ovs_flow_alloc();
566 567 568 569
	err = PTR_ERR(flow);
	if (IS_ERR(flow))
		goto err_kfree_skb;

570 571
	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
					     packet, &flow->key, log);
572 573 574
	if (err)
		goto err_flow_free;

J
Joe Stringer 已提交
575
	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
576
				   &flow->key, &acts, log);
577 578
	if (err)
		goto err_flow_free;
579

580
	rcu_assign_pointer(flow->sf_acts, acts);
581
	packet->priority = flow->key.phy.priority;
582
	packet->mark = flow->key.phy.skb_mark;
583 584

	rcu_read_lock();
J
Joe Stringer 已提交
585
	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
586 587 588 589
	err = -ENODEV;
	if (!dp)
		goto err_unlock;

590 591 592 593 594 595 596
	input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
	if (!input_vport)
		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);

	if (!input_vport)
		goto err_unlock;

J
Joe Stringer 已提交
597
	packet->dev = input_vport->dev;
598
	OVS_CB(packet)->input_vport = input_vport;
599
	sf_acts = rcu_dereference(flow->sf_acts);
600

601
	local_bh_disable();
602
	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
603 604 605
	local_bh_enable();
	rcu_read_unlock();

606
	ovs_flow_free(flow, false);
607 608 609 610 611
	return err;

err_unlock:
	rcu_read_unlock();
err_flow_free:
612
	ovs_flow_free(flow, false);
613 614 615 616 617 618 619
err_kfree_skb:
	kfree_skb(packet);
err:
	return err;
}

static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
620
	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
621 622
	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
623
	[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
J
Joe Stringer 已提交
624
	[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
625 626
};

627
static const struct genl_ops dp_packet_genl_ops[] = {
628
	{ .cmd = OVS_PACKET_CMD_EXECUTE,
629
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
630 631 632 633 634
	  .policy = packet_policy,
	  .doit = ovs_packet_cmd_execute
	}
};

635
static struct genl_family dp_packet_genl_family __ro_after_init = {
636 637 638 639 640 641 642 643
	.hdrsize = sizeof(struct ovs_header),
	.name = OVS_PACKET_FAMILY,
	.version = OVS_PACKET_VERSION,
	.maxattr = OVS_PACKET_ATTR_MAX,
	.netnsok = true,
	.parallel_ops = true,
	.ops = dp_packet_genl_ops,
	.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
644
	.module = THIS_MODULE,
645 646
};

647
static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
648
			 struct ovs_dp_megaflow_stats *mega_stats)
649 650 651
{
	int i;

652 653
	memset(mega_stats, 0, sizeof(*mega_stats));

654
	stats->n_flows = ovs_flow_tbl_count(&dp->table);
655
	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
656 657

	stats->n_hit = stats->n_missed = stats->n_lost = 0;
658

659 660 661 662 663 664 665 666
	for_each_possible_cpu(i) {
		const struct dp_stats_percpu *percpu_stats;
		struct dp_stats_percpu local_stats;
		unsigned int start;

		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);

		do {
667
			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
668
			local_stats = *percpu_stats;
669
		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
670 671 672 673

		stats->n_hit += local_stats.n_hit;
		stats->n_missed += local_stats.n_missed;
		stats->n_lost += local_stats.n_lost;
674
		mega_stats->n_mask_hit += local_stats.n_mask_hit;
675 676 677
	}
}

678 679 680 681 682 683 684 685 686 687 688 689
static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
{
	return ovs_identifier_is_ufid(sfid) &&
	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
}

static bool should_fill_mask(uint32_t ufid_flags)
{
	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
}

static bool should_fill_actions(uint32_t ufid_flags)
690
{
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
}

static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
				    const struct sw_flow_id *sfid,
				    uint32_t ufid_flags)
{
	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));

	/* OVS_FLOW_ATTR_UFID */
	if (sfid && ovs_identifier_is_ufid(sfid))
		len += nla_total_size(sfid->ufid_len);

	/* OVS_FLOW_ATTR_KEY */
	if (!sfid || should_fill_key(sfid, ufid_flags))
		len += nla_total_size(ovs_key_attr_size());

	/* OVS_FLOW_ATTR_MASK */
	if (should_fill_mask(ufid_flags))
		len += nla_total_size(ovs_key_attr_size());

	/* OVS_FLOW_ATTR_ACTIONS */
	if (should_fill_actions(ufid_flags))
714
		len += nla_total_size(acts->orig_len);
715 716

	return len
717
		+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
718
		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
719
		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
720 721
}

722 723 724 725 726 727 728
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
				   struct sk_buff *skb)
{
	struct ovs_flow_stats stats;
	__be16 tcp_flags;
	unsigned long used;
729

730
	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
731

732
	if (used &&
N
Nicolas Dichtel 已提交
733 734
	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
			      OVS_FLOW_ATTR_PAD))
735
		return -EMSGSIZE;
736

737
	if (stats.n_packets &&
738 739 740
	    nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
			  sizeof(struct ovs_flow_stats), &stats,
			  OVS_FLOW_ATTR_PAD))
741
		return -EMSGSIZE;
742

743 744
	if ((u8)ntohs(tcp_flags) &&
	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
745 746 747 748 749 750 751 752 753 754 755
		return -EMSGSIZE;

	return 0;
}

/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
				     struct sk_buff *skb, int skb_orig_len)
{
	struct nlattr *start;
	int err;
756 757 758 759 760 761 762 763 764 765 766

	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
	 * this is the first flow to be dumped into 'skb'.  This is unusual for
	 * Netlink but individual action lists can be longer than
	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
	 * The userspace caller can always fetch the actions separately if it
	 * really wants them.  (Most userspace callers in fact don't care.)
	 *
	 * This can only fail for dump operations because the skb is always
	 * properly sized for single flows.
	 */
767 768
	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
	if (start) {
769 770
		const struct sw_flow_actions *sf_acts;

771
		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
772 773
		err = ovs_nla_put_actions(sf_acts->actions,
					  sf_acts->actions_len, skb);
774

775 776 777 778
		if (!err)
			nla_nest_end(skb, start);
		else {
			if (skb_orig_len)
779
				return err;
780 781 782

			nla_nest_cancel(skb, start);
		}
783 784 785 786 787 788 789 790 791 792
	} else if (skb_orig_len) {
		return -EMSGSIZE;
	}

	return 0;
}

/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
				  struct sk_buff *skb, u32 portid,
793
				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
794 795 796 797 798 799 800 801 802 803 804 805
{
	const int skb_orig_len = skb->len;
	struct ovs_header *ovs_header;
	int err;

	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
				 flags, cmd);
	if (!ovs_header)
		return -EMSGSIZE;

	ovs_header->dp_ifindex = dp_ifindex;

806
	err = ovs_nla_put_identifier(flow, skb);
807 808 809
	if (err)
		goto error;

810 811 812 813 814 815 816 817 818 819 820
	if (should_fill_key(&flow->id, ufid_flags)) {
		err = ovs_nla_put_masked_key(flow, skb);
		if (err)
			goto error;
	}

	if (should_fill_mask(ufid_flags)) {
		err = ovs_nla_put_mask(flow, skb);
		if (err)
			goto error;
	}
821 822 823 824 825

	err = ovs_flow_cmd_fill_stats(flow, skb);
	if (err)
		goto error;

826 827 828 829 830
	if (should_fill_actions(ufid_flags)) {
		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
		if (err)
			goto error;
	}
831

832 833
	genlmsg_end(skb, ovs_header);
	return 0;
834 835 836 837 838 839

error:
	genlmsg_cancel(skb, ovs_header);
	return err;
}

840 841
/* May not be called with RCU read lock. */
static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
842
					       const struct sw_flow_id *sfid,
843
					       struct genl_info *info,
844 845
					       bool always,
					       uint32_t ufid_flags)
846
{
847
	struct sk_buff *skb;
848
	size_t len;
849

850
	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
851 852
		return NULL;

853
	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
854
	skb = genlmsg_new(len, GFP_KERNEL);
855 856 857 858
	if (!skb)
		return ERR_PTR(-ENOMEM);

	return skb;
859 860
}

861 862 863 864
/* Called with ovs_mutex. */
static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
					       int dp_ifindex,
					       struct genl_info *info, u8 cmd,
865
					       bool always, u32 ufid_flags)
866 867 868 869
{
	struct sk_buff *skb;
	int retval;

870 871
	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
				      &flow->id, info, always, ufid_flags);
872
	if (IS_ERR_OR_NULL(skb))
873
		return skb;
874

875 876
	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
					info->snd_portid, info->snd_seq, 0,
877
					cmd, ufid_flags);
878 879 880 881
	BUG_ON(retval < 0);
	return skb;
}

882
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
883
{
J
Joe Stringer 已提交
884
	struct net *net = sock_net(skb->sk);
885 886
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
887
	struct sw_flow *flow = NULL, *new_flow;
888
	struct sw_flow_mask mask;
889 890
	struct sk_buff *reply;
	struct datapath *dp;
891
	struct sw_flow_actions *acts;
892
	struct sw_flow_match match;
893
	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
894
	int error;
895
	bool log = !a[OVS_FLOW_ATTR_PROBE];
896

897
	/* Must have key and actions. */
898
	error = -EINVAL;
899
	if (!a[OVS_FLOW_ATTR_KEY]) {
900
		OVS_NLERR(log, "Flow key attr not present in new flow.");
901
		goto error;
902 903
	}
	if (!a[OVS_FLOW_ATTR_ACTIONS]) {
904
		OVS_NLERR(log, "Flow actions attr not present in new flow.");
905
		goto error;
906
	}
907

908 909 910 911 912 913 914 915 916 917
	/* Most of the time we need to allocate a new flow, do it before
	 * locking.
	 */
	new_flow = ovs_flow_alloc();
	if (IS_ERR(new_flow)) {
		error = PTR_ERR(new_flow);
		goto error;
	}

	/* Extract key. */
918
	ovs_match_init(&match, &new_flow->key, false, &mask);
919
	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
920
				  a[OVS_FLOW_ATTR_MASK], log);
921
	if (error)
922
		goto err_kfree_flow;
923

924 925
	/* Extract flow identifier. */
	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
926
				       &new_flow->key, log);
927 928
	if (error)
		goto err_kfree_flow;
929

930 931 932 933 934 935
	/* unmasked key is needed to match when ufid is not used. */
	if (ovs_identifier_is_key(&new_flow->id))
		match.key = new_flow->id.unmasked_key;

	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);

936
	/* Validate actions. */
J
Joe Stringer 已提交
937 938
	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
				     &new_flow->key, &acts, log);
939
	if (error) {
940
		OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
941
		goto err_kfree_flow;
942 943
	}

944 945
	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
					ufid_flags);
946 947 948
	if (IS_ERR(reply)) {
		error = PTR_ERR(reply);
		goto err_kfree_acts;
949 950
	}

951
	ovs_lock();
J
Joe Stringer 已提交
952
	dp = get_dp(net, ovs_header->dp_ifindex);
953 954
	if (unlikely(!dp)) {
		error = -ENODEV;
955
		goto err_unlock_ovs;
956
	}
957

958
	/* Check if this is a duplicate flow */
959 960 961
	if (ovs_identifier_is_ufid(&new_flow->id))
		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
	if (!flow)
962
		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
963 964
	if (likely(!flow)) {
		rcu_assign_pointer(new_flow->sf_acts, acts);
965 966

		/* Put flow in bucket. */
967 968
		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
		if (unlikely(error)) {
969
			acts = NULL;
970 971 972 973 974 975 976 977
			goto err_unlock_ovs;
		}

		if (unlikely(reply)) {
			error = ovs_flow_cmd_fill_info(new_flow,
						       ovs_header->dp_ifindex,
						       reply, info->snd_portid,
						       info->snd_seq, 0,
978 979
						       OVS_FLOW_CMD_NEW,
						       ufid_flags);
980
			BUG_ON(error < 0);
981
		}
982
		ovs_unlock();
983
	} else {
984 985
		struct sw_flow_actions *old_acts;

986 987 988 989 990 991
		/* Bail out if we're not allowed to modify an existing flow.
		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
		 * because Generic Netlink treats the latter as a dump
		 * request.  We also accept NLM_F_EXCL in case that bug ever
		 * gets fixed.
		 */
992 993 994
		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
							 | NLM_F_EXCL))) {
			error = -EEXIST;
995
			goto err_unlock_ovs;
996
		}
997 998 999 1000 1001 1002 1003 1004 1005
		/* The flow identifier has to be the same for flow updates.
		 * Look for any overlapping flow.
		 */
		if (unlikely(!ovs_flow_cmp(flow, &match))) {
			if (ovs_identifier_is_key(&flow->id))
				flow = ovs_flow_tbl_lookup_exact(&dp->table,
								 &match);
			else /* UFID matches but key is different */
				flow = NULL;
1006 1007 1008 1009
			if (!flow) {
				error = -ENOENT;
				goto err_unlock_ovs;
			}
1010
		}
1011 1012 1013 1014
		/* Update actions. */
		old_acts = ovsl_dereference(flow->sf_acts);
		rcu_assign_pointer(flow->sf_acts, acts);

1015 1016 1017 1018 1019
		if (unlikely(reply)) {
			error = ovs_flow_cmd_fill_info(flow,
						       ovs_header->dp_ifindex,
						       reply, info->snd_portid,
						       info->snd_seq, 0,
1020 1021
						       OVS_FLOW_CMD_NEW,
						       ufid_flags);
1022 1023 1024
			BUG_ON(error < 0);
		}
		ovs_unlock();
1025

1026
		ovs_nla_free_flow_actions_rcu(old_acts);
1027
		ovs_flow_free(new_flow, false);
1028
	}
1029 1030 1031

	if (reply)
		ovs_notify(&dp_flow_genl_family, reply, info);
1032 1033 1034 1035
	return 0;

err_unlock_ovs:
	ovs_unlock();
1036 1037
	kfree_skb(reply);
err_kfree_acts:
1038
	ovs_nla_free_flow_actions(acts);
1039 1040
err_kfree_flow:
	ovs_flow_free(new_flow, false);
1041 1042 1043
error:
	return error;
}
1044

1045
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
J
Joe Stringer 已提交
1046 1047
static struct sw_flow_actions *get_flow_actions(struct net *net,
						const struct nlattr *a,
1048
						const struct sw_flow_key *key,
1049 1050
						const struct sw_flow_mask *mask,
						bool log)
1051 1052 1053 1054 1055
{
	struct sw_flow_actions *acts;
	struct sw_flow_key masked_key;
	int error;

1056
	ovs_flow_mask_key(&masked_key, key, true, mask);
J
Joe Stringer 已提交
1057
	error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1058
	if (error) {
1059 1060
		OVS_NLERR(log,
			  "Actions may not be safe on all matching packets");
1061 1062 1063 1064 1065 1066
		return ERR_PTR(error);
	}

	return acts;
}

1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
/* Factor out match-init and action-copy to avoid
 * "Wframe-larger-than=1024" warning. Because mask is only
 * used to get actions, we new a function to save some
 * stack space.
 *
 * If there are not key and action attrs, we return 0
 * directly. In the case, the caller will also not use the
 * match as before. If there is action attr, we try to get
 * actions and save them to *acts. Before returning from
 * the function, we reset the match->mask pointer. Because
 * we should not to return match object with dangling reference
 * to mask.
 * */
static int ovs_nla_init_match_and_action(struct net *net,
					 struct sw_flow_match *match,
					 struct sw_flow_key *key,
					 struct nlattr **a,
					 struct sw_flow_actions **acts,
					 bool log)
{
	struct sw_flow_mask mask;
	int error = 0;

	if (a[OVS_FLOW_ATTR_KEY]) {
		ovs_match_init(match, key, true, &mask);
		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
					  a[OVS_FLOW_ATTR_MASK], log);
		if (error)
			goto error;
	}

	if (a[OVS_FLOW_ATTR_ACTIONS]) {
		if (!a[OVS_FLOW_ATTR_KEY]) {
			OVS_NLERR(log,
				  "Flow key attribute not present in set flow.");
1102 1103
			error = -EINVAL;
			goto error;
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
		}

		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
					 &mask, log);
		if (IS_ERR(*acts)) {
			error = PTR_ERR(*acts);
			goto error;
		}
	}

	/* On success, error is 0. */
error:
	match->mask = NULL;
	return error;
}

1120 1121
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
J
Joe Stringer 已提交
1122
	struct net *net = sock_net(skb->sk);
1123 1124
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
1125
	struct sw_flow_key key;
1126 1127 1128
	struct sw_flow *flow;
	struct sk_buff *reply = NULL;
	struct datapath *dp;
1129
	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1130
	struct sw_flow_match match;
1131 1132
	struct sw_flow_id sfid;
	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1133
	int error = 0;
1134
	bool log = !a[OVS_FLOW_ATTR_PROBE];
1135
	bool ufid_present;
1136

1137
	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1138
	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1139 1140
		OVS_NLERR(log,
			  "Flow set message rejected, Key attribute missing.");
1141
		return -EINVAL;
1142
	}
1143 1144 1145

	error = ovs_nla_init_match_and_action(net, &match, &key, a,
					      &acts, log);
1146 1147 1148
	if (error)
		goto error;

1149
	if (acts) {
1150
		/* Can allocate before locking if have acts. */
1151 1152
		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
						ufid_flags);
1153 1154 1155
		if (IS_ERR(reply)) {
			error = PTR_ERR(reply);
			goto err_kfree_acts;
1156
		}
1157
	}
1158

1159
	ovs_lock();
J
Joe Stringer 已提交
1160
	dp = get_dp(net, ovs_header->dp_ifindex);
1161 1162
	if (unlikely(!dp)) {
		error = -ENODEV;
1163
		goto err_unlock_ovs;
1164
	}
1165
	/* Check that the flow exists. */
1166 1167 1168 1169
	if (ufid_present)
		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
	else
		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1170 1171
	if (unlikely(!flow)) {
		error = -ENOENT;
1172
		goto err_unlock_ovs;
1173
	}
1174

1175
	/* Update actions, if present. */
1176
	if (likely(acts)) {
1177 1178
		old_acts = ovsl_dereference(flow->sf_acts);
		rcu_assign_pointer(flow->sf_acts, acts);
1179 1180 1181 1182 1183 1184

		if (unlikely(reply)) {
			error = ovs_flow_cmd_fill_info(flow,
						       ovs_header->dp_ifindex,
						       reply, info->snd_portid,
						       info->snd_seq, 0,
1185 1186
						       OVS_FLOW_CMD_NEW,
						       ufid_flags);
1187 1188 1189 1190 1191
			BUG_ON(error < 0);
		}
	} else {
		/* Could not alloc without acts before locking. */
		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1192 1193 1194
						info, OVS_FLOW_CMD_NEW, false,
						ufid_flags);

1195
		if (IS_ERR(reply)) {
1196 1197 1198
			error = PTR_ERR(reply);
			goto err_unlock_ovs;
		}
1199
	}
1200 1201 1202 1203

	/* Clear stats. */
	if (a[OVS_FLOW_ATTR_CLEAR])
		ovs_flow_stats_clear(flow);
1204
	ovs_unlock();
1205

1206 1207 1208
	if (reply)
		ovs_notify(&dp_flow_genl_family, reply, info);
	if (old_acts)
1209
		ovs_nla_free_flow_actions_rcu(old_acts);
1210

1211 1212
	return 0;

1213 1214
err_unlock_ovs:
	ovs_unlock();
1215 1216
	kfree_skb(reply);
err_kfree_acts:
1217
	ovs_nla_free_flow_actions(acts);
1218 1219 1220 1221 1222 1223 1224 1225
error:
	return error;
}

static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
1226
	struct net *net = sock_net(skb->sk);
1227 1228 1229 1230
	struct sw_flow_key key;
	struct sk_buff *reply;
	struct sw_flow *flow;
	struct datapath *dp;
1231
	struct sw_flow_match match;
1232 1233 1234
	struct sw_flow_id ufid;
	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
	int err = 0;
1235
	bool log = !a[OVS_FLOW_ATTR_PROBE];
1236
	bool ufid_present;
1237

1238 1239
	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
	if (a[OVS_FLOW_ATTR_KEY]) {
1240
		ovs_match_init(&match, &key, true, NULL);
1241
		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1242 1243
					log);
	} else if (!ufid_present) {
1244 1245
		OVS_NLERR(log,
			  "Flow get message rejected, Key attribute missing.");
1246
		err = -EINVAL;
1247
	}
1248 1249 1250
	if (err)
		return err;

1251
	ovs_lock();
1252
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1253 1254 1255 1256
	if (!dp) {
		err = -ENODEV;
		goto unlock;
	}
1257

1258 1259 1260 1261
	if (ufid_present)
		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
	else
		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1262
	if (!flow) {
1263 1264 1265
		err = -ENOENT;
		goto unlock;
	}
1266

1267
	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1268
					OVS_FLOW_CMD_NEW, true, ufid_flags);
1269 1270 1271 1272
	if (IS_ERR(reply)) {
		err = PTR_ERR(reply);
		goto unlock;
	}
1273

1274
	ovs_unlock();
1275
	return genlmsg_reply(reply, info);
1276 1277 1278
unlock:
	ovs_unlock();
	return err;
1279 1280 1281 1282 1283 1284
}

static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
1285
	struct net *net = sock_net(skb->sk);
1286 1287
	struct sw_flow_key key;
	struct sk_buff *reply;
1288
	struct sw_flow *flow = NULL;
1289
	struct datapath *dp;
1290
	struct sw_flow_match match;
1291 1292
	struct sw_flow_id ufid;
	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1293
	int err;
1294
	bool log = !a[OVS_FLOW_ATTR_PROBE];
1295
	bool ufid_present;
1296

1297 1298
	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
	if (a[OVS_FLOW_ATTR_KEY]) {
1299
		ovs_match_init(&match, &key, true, NULL);
1300 1301
		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
					NULL, log);
1302 1303 1304 1305
		if (unlikely(err))
			return err;
	}

1306
	ovs_lock();
1307
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1308
	if (unlikely(!dp)) {
1309 1310 1311
		err = -ENODEV;
		goto unlock;
	}
1312

1313
	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1314
		err = ovs_flow_tbl_flush(&dp->table);
1315 1316
		goto unlock;
	}
1317

1318 1319 1320 1321
	if (ufid_present)
		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
	else
		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1322
	if (unlikely(!flow)) {
1323 1324 1325
		err = -ENOENT;
		goto unlock;
	}
1326

1327
	ovs_flow_tbl_remove(&dp->table, flow);
1328
	ovs_unlock();
1329

1330
	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1331
					&flow->id, info, false, ufid_flags);
1332 1333 1334 1335 1336 1337
	if (likely(reply)) {
		if (likely(!IS_ERR(reply))) {
			rcu_read_lock();	/*To keep RCU checker happy. */
			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
						     reply, info->snd_portid,
						     info->snd_seq, 0,
1338 1339
						     OVS_FLOW_CMD_DEL,
						     ufid_flags);
1340 1341 1342 1343 1344 1345 1346
			rcu_read_unlock();
			BUG_ON(err < 0);

			ovs_notify(&dp_flow_genl_family, reply, info);
		} else {
			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
		}
1347
	}
1348

1349
	ovs_flow_free(flow, true);
1350
	return 0;
1351 1352 1353
unlock:
	ovs_unlock();
	return err;
1354 1355 1356 1357
}

static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
1358
	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1359
	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1360
	struct table_instance *ti;
1361
	struct datapath *dp;
1362 1363 1364 1365
	u32 ufid_flags;
	int err;

	err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1366
			    OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1367 1368 1369
	if (err)
		return err;
	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1370

1371
	rcu_read_lock();
1372
	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1373
	if (!dp) {
1374
		rcu_read_unlock();
1375
		return -ENODEV;
1376
	}
1377

1378
	ti = rcu_dereference(dp->table.ti);
1379 1380 1381 1382 1383 1384
	for (;;) {
		struct sw_flow *flow;
		u32 bucket, obj;

		bucket = cb->args[0];
		obj = cb->args[1];
1385
		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1386 1387 1388
		if (!flow)
			break;

1389
		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1390
					   NETLINK_CB(cb->skb).portid,
1391
					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1392
					   OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1393 1394 1395 1396 1397
			break;

		cb->args[0] = bucket;
		cb->args[1] = obj;
	}
1398
	rcu_read_unlock();
1399 1400 1401
	return skb->len;
}

1402 1403
static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1404
	[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1405 1406
	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1407
	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1408 1409
	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1410 1411
};

1412
static const struct genl_ops dp_flow_genl_ops[] = {
1413
	{ .cmd = OVS_FLOW_CMD_NEW,
1414
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1415
	  .policy = flow_policy,
1416
	  .doit = ovs_flow_cmd_new
1417 1418
	},
	{ .cmd = OVS_FLOW_CMD_DEL,
1419
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
	  .policy = flow_policy,
	  .doit = ovs_flow_cmd_del
	},
	{ .cmd = OVS_FLOW_CMD_GET,
	  .flags = 0,		    /* OK for unprivileged users. */
	  .policy = flow_policy,
	  .doit = ovs_flow_cmd_get,
	  .dumpit = ovs_flow_cmd_dump
	},
	{ .cmd = OVS_FLOW_CMD_SET,
1430
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1431
	  .policy = flow_policy,
1432
	  .doit = ovs_flow_cmd_set,
1433 1434 1435
	},
};

1436
static struct genl_family dp_flow_genl_family __ro_after_init = {
1437
	.hdrsize = sizeof(struct ovs_header),
1438 1439 1440
	.name = OVS_FLOW_FAMILY,
	.version = OVS_FLOW_VERSION,
	.maxattr = OVS_FLOW_ATTR_MAX,
1441 1442
	.netnsok = true,
	.parallel_ops = true,
1443 1444 1445 1446
	.ops = dp_flow_genl_ops,
	.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
	.mcgrps = &ovs_dp_flow_multicast_group,
	.n_mcgrps = 1,
1447
	.module = THIS_MODULE,
1448 1449
};

1450 1451 1452 1453 1454
static size_t ovs_dp_cmd_msg_size(void)
{
	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));

	msgsize += nla_total_size(IFNAMSIZ);
1455 1456
	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1457
	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1458 1459 1460 1461

	return msgsize;
}

1462
/* Called with ovs_mutex. */
1463
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1464
				u32 portid, u32 seq, u32 flags, u8 cmd)
1465 1466 1467
{
	struct ovs_header *ovs_header;
	struct ovs_dp_stats dp_stats;
1468
	struct ovs_dp_megaflow_stats dp_megaflow_stats;
1469 1470
	int err;

1471
	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
				   flags, cmd);
	if (!ovs_header)
		goto error;

	ovs_header->dp_ifindex = get_dpifindex(dp);

	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
	if (err)
		goto nla_put_failure;

1482
	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1483 1484
	if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
			  &dp_stats, OVS_DP_ATTR_PAD))
1485 1486
		goto nla_put_failure;

1487 1488 1489
	if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
			  sizeof(struct ovs_dp_megaflow_stats),
			  &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1490
		goto nla_put_failure;
1491

1492 1493 1494
	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
		goto nla_put_failure;

1495 1496
	genlmsg_end(skb, ovs_header);
	return 0;
1497 1498 1499 1500 1501 1502 1503

nla_put_failure:
	genlmsg_cancel(skb, ovs_header);
error:
	return -EMSGSIZE;
}

1504
static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1505
{
1506
	return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1507 1508
}

J
Jarno Rajahalme 已提交
1509
/* Called with rcu_read_lock or ovs_mutex. */
1510
static struct datapath *lookup_datapath(struct net *net,
1511
					const struct ovs_header *ovs_header,
1512 1513 1514 1515 1516
					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
	struct datapath *dp;

	if (!a[OVS_DP_ATTR_NAME])
1517
		dp = get_dp(net, ovs_header->dp_ifindex);
1518 1519 1520
	else {
		struct vport *vport;

1521
		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1522 1523 1524 1525 1526
		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
	}
	return dp ? dp : ERR_PTR(-ENODEV);
}

1527 1528 1529 1530 1531
static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
{
	struct datapath *dp;

	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1532
	if (IS_ERR(dp))
1533 1534 1535 1536 1537 1538
		return;

	WARN(dp->user_features, "Dropping previously announced user features\n");
	dp->user_features = 0;
}

1539
static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1540 1541 1542 1543 1544
{
	if (a[OVS_DP_ATTR_USER_FEATURES])
		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
}

1545 1546 1547 1548 1549 1550 1551
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct vport_parms parms;
	struct sk_buff *reply;
	struct datapath *dp;
	struct vport *vport;
1552
	struct ovs_net *ovs_net;
1553
	int err, i;
1554 1555 1556 1557 1558

	err = -EINVAL;
	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
		goto err;

1559
	reply = ovs_dp_cmd_alloc_info();
1560 1561
	if (!reply)
		return -ENOMEM;
1562 1563 1564 1565

	err = -ENOMEM;
	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
	if (dp == NULL)
1566
		goto err_free_reply;
1567

1568
	ovs_dp_set_net(dp, sock_net(skb->sk));
1569 1570

	/* Allocate table. */
1571 1572
	err = ovs_flow_tbl_init(&dp->table);
	if (err)
1573 1574
		goto err_free_dp;

1575
	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1576 1577 1578 1579 1580
	if (!dp->stats_percpu) {
		err = -ENOMEM;
		goto err_destroy_table;
	}

1581
	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1582
			    GFP_KERNEL);
1583 1584 1585 1586 1587 1588 1589 1590
	if (!dp->ports) {
		err = -ENOMEM;
		goto err_destroy_percpu;
	}

	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
		INIT_HLIST_HEAD(&dp->ports[i]);

A
Andy Zhou 已提交
1591 1592 1593 1594
	err = ovs_meters_init(dp);
	if (err)
		goto err_destroy_ports_array;

1595 1596 1597 1598 1599 1600
	/* Set up our datapath device. */
	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
	parms.type = OVS_VPORT_TYPE_INTERNAL;
	parms.options = NULL;
	parms.dp = dp;
	parms.port_no = OVSP_LOCAL;
1601
	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1602

1603 1604
	ovs_dp_change(dp, a);

1605 1606 1607
	/* So far only local changes have been made, now need the lock. */
	ovs_lock();

1608 1609 1610 1611 1612 1613
	vport = new_vport(&parms);
	if (IS_ERR(vport)) {
		err = PTR_ERR(vport);
		if (err == -EBUSY)
			err = -EEXIST;

1614 1615 1616 1617 1618 1619 1620 1621 1622
		if (err == -EEXIST) {
			/* An outdated user space instance that does not understand
			 * the concept of user_features has attempted to create a new
			 * datapath and is likely to reuse it. Drop all user features.
			 */
			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
				ovs_dp_reset_user_features(skb, info);
		}

A
Andy Zhou 已提交
1623
		goto err_destroy_meters;
1624 1625
	}

1626 1627 1628
	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
				   info->snd_seq, 0, OVS_DP_CMD_NEW);
	BUG_ON(err < 0);
1629

1630
	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1631
	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1632 1633

	ovs_unlock();
1634

1635
	ovs_notify(&dp_datapath_genl_family, reply, info);
1636 1637
	return 0;

A
Andy Zhou 已提交
1638
err_destroy_meters:
1639
	ovs_unlock();
A
Andy Zhou 已提交
1640 1641
	ovs_meters_exit(dp);
err_destroy_ports_array:
1642
	kfree(dp->ports);
1643 1644 1645
err_destroy_percpu:
	free_percpu(dp->stats_percpu);
err_destroy_table:
1646
	ovs_flow_tbl_destroy(&dp->table);
1647 1648
err_free_dp:
	kfree(dp);
1649 1650
err_free_reply:
	kfree_skb(reply);
1651 1652 1653 1654
err:
	return err;
}

1655
/* Called with ovs_mutex. */
1656
static void __dp_destroy(struct datapath *dp)
1657
{
1658
	int i;
1659

1660 1661
	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
		struct vport *vport;
1662
		struct hlist_node *n;
1663

1664
		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1665 1666 1667
			if (vport->port_no != OVSP_LOCAL)
				ovs_dp_detach_port(vport);
	}
1668

1669
	list_del_rcu(&dp->list_node);
1670

1671
	/* OVSP_LOCAL is datapath internal port. We need to make sure that
1672
	 * all ports in datapath are destroyed first before freeing datapath.
1673
	 */
1674
	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1675

1676
	/* RCU destroy the flow table */
1677
	call_rcu(&dp->rcu, destroy_dp_rcu);
1678 1679 1680 1681 1682 1683 1684 1685
}

static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
	struct sk_buff *reply;
	struct datapath *dp;
	int err;

1686
	reply = ovs_dp_cmd_alloc_info();
1687 1688 1689
	if (!reply)
		return -ENOMEM;

1690
	ovs_lock();
1691 1692 1693
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
	err = PTR_ERR(dp);
	if (IS_ERR(dp))
1694
		goto err_unlock_free;
1695

1696 1697 1698
	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
				   info->snd_seq, 0, OVS_DP_CMD_DEL);
	BUG_ON(err < 0);
1699 1700

	__dp_destroy(dp);
1701
	ovs_unlock();
1702

1703
	ovs_notify(&dp_datapath_genl_family, reply, info);
1704 1705

	return 0;
1706 1707

err_unlock_free:
1708
	ovs_unlock();
1709
	kfree_skb(reply);
1710
	return err;
1711 1712 1713 1714 1715 1716 1717 1718
}

static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
	struct sk_buff *reply;
	struct datapath *dp;
	int err;

1719
	reply = ovs_dp_cmd_alloc_info();
1720 1721 1722
	if (!reply)
		return -ENOMEM;

1723
	ovs_lock();
1724
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1725
	err = PTR_ERR(dp);
1726
	if (IS_ERR(dp))
1727
		goto err_unlock_free;
1728

1729 1730
	ovs_dp_change(dp, info->attrs);

1731 1732 1733
	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
				   info->snd_seq, 0, OVS_DP_CMD_NEW);
	BUG_ON(err < 0);
1734

1735
	ovs_unlock();
1736
	ovs_notify(&dp_datapath_genl_family, reply, info);
1737 1738

	return 0;
1739 1740

err_unlock_free:
1741
	ovs_unlock();
1742
	kfree_skb(reply);
1743
	return err;
1744 1745 1746 1747 1748 1749
}

static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
	struct sk_buff *reply;
	struct datapath *dp;
1750
	int err;
1751

1752
	reply = ovs_dp_cmd_alloc_info();
1753 1754 1755
	if (!reply)
		return -ENOMEM;

1756
	ovs_lock();
1757
	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1758 1759
	if (IS_ERR(dp)) {
		err = PTR_ERR(dp);
1760
		goto err_unlock_free;
1761
	}
1762 1763 1764
	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
				   info->snd_seq, 0, OVS_DP_CMD_NEW);
	BUG_ON(err < 0);
1765
	ovs_unlock();
1766 1767

	return genlmsg_reply(reply, info);
1768

1769
err_unlock_free:
1770
	ovs_unlock();
1771
	kfree_skb(reply);
1772
	return err;
1773 1774 1775 1776
}

static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
1777
	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1778 1779 1780 1781
	struct datapath *dp;
	int skip = cb->args[0];
	int i = 0;

1782 1783
	ovs_lock();
	list_for_each_entry(dp, &ovs_net->dps, list_node) {
1784
		if (i >= skip &&
1785
		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1786 1787 1788 1789 1790
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 OVS_DP_CMD_NEW) < 0)
			break;
		i++;
	}
1791
	ovs_unlock();
1792 1793 1794 1795 1796 1797

	cb->args[0] = i;

	return skb->len;
}

1798 1799 1800 1801 1802 1803
static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
};

1804
static const struct genl_ops dp_datapath_genl_ops[] = {
1805
	{ .cmd = OVS_DP_CMD_NEW,
1806
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1807 1808 1809 1810
	  .policy = datapath_policy,
	  .doit = ovs_dp_cmd_new
	},
	{ .cmd = OVS_DP_CMD_DEL,
1811
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1812 1813 1814 1815 1816 1817 1818 1819 1820 1821
	  .policy = datapath_policy,
	  .doit = ovs_dp_cmd_del
	},
	{ .cmd = OVS_DP_CMD_GET,
	  .flags = 0,		    /* OK for unprivileged users. */
	  .policy = datapath_policy,
	  .doit = ovs_dp_cmd_get,
	  .dumpit = ovs_dp_cmd_dump
	},
	{ .cmd = OVS_DP_CMD_SET,
1822
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1823 1824 1825 1826 1827
	  .policy = datapath_policy,
	  .doit = ovs_dp_cmd_set,
	},
};

1828
static struct genl_family dp_datapath_genl_family __ro_after_init = {
1829
	.hdrsize = sizeof(struct ovs_header),
1830 1831 1832
	.name = OVS_DATAPATH_FAMILY,
	.version = OVS_DATAPATH_VERSION,
	.maxattr = OVS_DP_ATTR_MAX,
1833 1834
	.netnsok = true,
	.parallel_ops = true,
1835 1836 1837 1838
	.ops = dp_datapath_genl_ops,
	.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
	.mcgrps = &ovs_dp_datapath_multicast_group,
	.n_mcgrps = 1,
1839
	.module = THIS_MODULE,
1840 1841
};

1842
/* Called with ovs_mutex or RCU read lock. */
1843
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1844 1845
				   struct net *net, u32 portid, u32 seq,
				   u32 flags, u8 cmd)
1846 1847 1848 1849 1850
{
	struct ovs_header *ovs_header;
	struct ovs_vport_stats vport_stats;
	int err;

1851
	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1852 1853 1854 1855 1856 1857
				 flags, cmd);
	if (!ovs_header)
		return -EMSGSIZE;

	ovs_header->dp_ifindex = get_dpifindex(vport->dp);

1858 1859
	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1860
	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1861 1862
			   ovs_vport_name(vport)) ||
	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1863
		goto nla_put_failure;
1864

1865 1866 1867 1868 1869 1870 1871
	if (!net_eq(net, dev_net(vport->dev))) {
		int id = peernet2id_alloc(net, dev_net(vport->dev));

		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
			goto nla_put_failure;
	}

1872
	ovs_vport_get_stats(vport, &vport_stats);
1873 1874 1875
	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
			  sizeof(struct ovs_vport_stats), &vport_stats,
			  OVS_VPORT_ATTR_PAD))
1876
		goto nla_put_failure;
1877

1878 1879 1880
	if (ovs_vport_get_upcall_portids(vport, skb))
		goto nla_put_failure;

1881 1882 1883 1884
	err = ovs_vport_get_options(vport, skb);
	if (err == -EMSGSIZE)
		goto error;

1885 1886
	genlmsg_end(skb, ovs_header);
	return 0;
1887 1888 1889 1890 1891 1892 1893 1894

nla_put_failure:
	err = -EMSGSIZE;
error:
	genlmsg_cancel(skb, ovs_header);
	return err;
}

1895 1896 1897 1898 1899 1900
static struct sk_buff *ovs_vport_cmd_alloc_info(void)
{
	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
}

/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1901 1902
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
					 u32 portid, u32 seq, u8 cmd)
1903 1904 1905 1906 1907 1908 1909 1910
{
	struct sk_buff *skb;
	int retval;

	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
	if (!skb)
		return ERR_PTR(-ENOMEM);

1911
	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
1912 1913
	BUG_ON(retval < 0);

1914 1915 1916
	return skb;
}

1917
/* Called with ovs_mutex or RCU read lock. */
1918
static struct vport *lookup_vport(struct net *net,
1919
				  const struct ovs_header *ovs_header,
1920 1921 1922 1923 1924
				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
{
	struct datapath *dp;
	struct vport *vport;

1925 1926
	if (a[OVS_VPORT_ATTR_IFINDEX])
		return ERR_PTR(-EOPNOTSUPP);
1927
	if (a[OVS_VPORT_ATTR_NAME]) {
1928
		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1929 1930
		if (!vport)
			return ERR_PTR(-ENODEV);
1931 1932 1933
		if (ovs_header->dp_ifindex &&
		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
			return ERR_PTR(-ENODEV);
1934 1935 1936 1937 1938 1939 1940
		return vport;
	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);

		if (port_no >= DP_MAX_PORTS)
			return ERR_PTR(-EFBIG);

1941
		dp = get_dp(net, ovs_header->dp_ifindex);
1942 1943 1944
		if (!dp)
			return ERR_PTR(-ENODEV);

1945
		vport = ovs_vport_ovsl_rcu(dp, port_no);
1946
		if (!vport)
1947
			return ERR_PTR(-ENODEV);
1948 1949 1950
		return vport;
	} else
		return ERR_PTR(-EINVAL);
1951

1952 1953
}

1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
/* Called with ovs_mutex */
static void update_headroom(struct datapath *dp)
{
	unsigned dev_headroom, max_headroom = 0;
	struct net_device *dev;
	struct vport *vport;
	int i;

	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
			dev = vport->dev;
			dev_headroom = netdev_get_fwd_headroom(dev);
			if (dev_headroom > max_headroom)
				max_headroom = dev_headroom;
		}
	}

	dp->max_headroom = max_headroom;
	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
			netdev_set_rx_headroom(vport->dev, max_headroom);
}

1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
	struct vport_parms parms;
	struct sk_buff *reply;
	struct vport *vport;
	struct datapath *dp;
	u32 port_no;
	int err;

	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
	    !a[OVS_VPORT_ATTR_UPCALL_PID])
1990
		return -EINVAL;
1991 1992
	if (a[OVS_VPORT_ATTR_IFINDEX])
		return -EOPNOTSUPP;
1993 1994 1995 1996 1997 1998 1999 2000 2001

	port_no = a[OVS_VPORT_ATTR_PORT_NO]
		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
	if (port_no >= DP_MAX_PORTS)
		return -EFBIG;

	reply = ovs_vport_cmd_alloc_info();
	if (!reply)
		return -ENOMEM;
2002

2003
	ovs_lock();
2004
restart:
2005
	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2006 2007
	err = -ENODEV;
	if (!dp)
2008
		goto exit_unlock_free;
2009

2010
	if (port_no) {
2011
		vport = ovs_vport_ovsl(dp, port_no);
2012 2013
		err = -EBUSY;
		if (vport)
2014
			goto exit_unlock_free;
2015 2016 2017 2018
	} else {
		for (port_no = 1; ; port_no++) {
			if (port_no >= DP_MAX_PORTS) {
				err = -EFBIG;
2019
				goto exit_unlock_free;
2020
			}
2021
			vport = ovs_vport_ovsl(dp, port_no);
2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
			if (!vport)
				break;
		}
	}

	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
	parms.dp = dp;
	parms.port_no = port_no;
2032
	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2033 2034 2035

	vport = new_vport(&parms);
	err = PTR_ERR(vport);
2036 2037 2038
	if (IS_ERR(vport)) {
		if (err == -EAGAIN)
			goto restart;
2039
		goto exit_unlock_free;
2040
	}
2041

2042 2043 2044
	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
				      info->snd_portid, info->snd_seq, 0,
				      OVS_VPORT_CMD_NEW);
2045 2046 2047 2048 2049 2050

	if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
		update_headroom(dp);
	else
		netdev_set_rx_headroom(vport->dev, dp->max_headroom);

2051 2052
	BUG_ON(err < 0);
	ovs_unlock();
2053

2054
	ovs_notify(&dp_vport_genl_family, reply, info);
2055
	return 0;
2056

2057
exit_unlock_free:
2058
	ovs_unlock();
2059
	kfree_skb(reply);
2060 2061 2062 2063 2064 2065 2066 2067 2068 2069
	return err;
}

static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct sk_buff *reply;
	struct vport *vport;
	int err;

2070 2071 2072 2073
	reply = ovs_vport_cmd_alloc_info();
	if (!reply)
		return -ENOMEM;

2074
	ovs_lock();
2075
	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2076 2077
	err = PTR_ERR(vport);
	if (IS_ERR(vport))
2078
		goto exit_unlock_free;
2079 2080

	if (a[OVS_VPORT_ATTR_TYPE] &&
2081
	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2082
		err = -EINVAL;
2083
		goto exit_unlock_free;
2084 2085
	}

2086
	if (a[OVS_VPORT_ATTR_OPTIONS]) {
2087
		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2088
		if (err)
2089
			goto exit_unlock_free;
2090
	}
2091

2092 2093 2094 2095 2096 2097 2098 2099

	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];

		err = ovs_vport_set_upcall_portids(vport, ids);
		if (err)
			goto exit_unlock_free;
	}
2100

2101 2102 2103
	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
				      info->snd_portid, info->snd_seq, 0,
				      OVS_VPORT_CMD_NEW);
2104
	BUG_ON(err < 0);
2105

2106
	ovs_unlock();
2107
	ovs_notify(&dp_vport_genl_family, reply, info);
2108
	return 0;
2109

2110
exit_unlock_free:
2111
	ovs_unlock();
2112
	kfree_skb(reply);
2113 2114 2115 2116 2117
	return err;
}

static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
2118
	bool must_update_headroom = false;
2119 2120
	struct nlattr **a = info->attrs;
	struct sk_buff *reply;
2121
	struct datapath *dp;
2122 2123 2124
	struct vport *vport;
	int err;

2125 2126 2127 2128
	reply = ovs_vport_cmd_alloc_info();
	if (!reply)
		return -ENOMEM;

2129
	ovs_lock();
2130
	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2131 2132
	err = PTR_ERR(vport);
	if (IS_ERR(vport))
2133
		goto exit_unlock_free;
2134 2135 2136

	if (vport->port_no == OVSP_LOCAL) {
		err = -EINVAL;
2137
		goto exit_unlock_free;
2138 2139
	}

2140 2141 2142
	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
				      info->snd_portid, info->snd_seq, 0,
				      OVS_VPORT_CMD_DEL);
2143
	BUG_ON(err < 0);
2144 2145 2146 2147 2148 2149

	/* the vport deletion may trigger dp headroom update */
	dp = vport->dp;
	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
		must_update_headroom = true;
	netdev_reset_rx_headroom(vport->dev);
2150
	ovs_dp_detach_port(vport);
2151 2152 2153

	if (must_update_headroom)
		update_headroom(dp);
2154
	ovs_unlock();
2155

2156
	ovs_notify(&dp_vport_genl_family, reply, info);
2157
	return 0;
2158

2159
exit_unlock_free:
2160
	ovs_unlock();
2161
	kfree_skb(reply);
2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172
	return err;
}

static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
	struct nlattr **a = info->attrs;
	struct ovs_header *ovs_header = info->userhdr;
	struct sk_buff *reply;
	struct vport *vport;
	int err;

2173 2174 2175 2176
	reply = ovs_vport_cmd_alloc_info();
	if (!reply)
		return -ENOMEM;

2177
	rcu_read_lock();
2178
	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2179 2180
	err = PTR_ERR(vport);
	if (IS_ERR(vport))
2181
		goto exit_unlock_free;
2182 2183 2184
	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
				      info->snd_portid, info->snd_seq, 0,
				      OVS_VPORT_CMD_NEW);
2185
	BUG_ON(err < 0);
2186 2187 2188 2189
	rcu_read_unlock();

	return genlmsg_reply(reply, info);

2190
exit_unlock_free:
2191
	rcu_read_unlock();
2192
	kfree_skb(reply);
2193 2194 2195 2196 2197 2198 2199
	return err;
}

static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
	struct datapath *dp;
2200 2201
	int bucket = cb->args[0], skip = cb->args[1];
	int i, j = 0;
2202

J
Jarno Rajahalme 已提交
2203
	rcu_read_lock();
2204
	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
J
Jarno Rajahalme 已提交
2205 2206
	if (!dp) {
		rcu_read_unlock();
2207
		return -ENODEV;
J
Jarno Rajahalme 已提交
2208
	}
2209
	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2210
		struct vport *vport;
2211 2212

		j = 0;
2213
		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2214 2215
			if (j >= skip &&
			    ovs_vport_cmd_fill_info(vport, skb,
2216
						    sock_net(skb->sk),
2217
						    NETLINK_CB(cb->skb).portid,
2218 2219 2220 2221 2222 2223 2224 2225
						    cb->nlh->nlmsg_seq,
						    NLM_F_MULTI,
						    OVS_VPORT_CMD_NEW) < 0)
				goto out;

			j++;
		}
		skip = 0;
2226
	}
2227
out:
2228 2229
	rcu_read_unlock();

2230 2231
	cb->args[0] = i;
	cb->args[1] = j;
2232

2233
	return skb->len;
2234 2235
}

2236 2237 2238 2239 2240 2241 2242
static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2243 2244
	[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2245 2246
};

2247
static const struct genl_ops dp_vport_genl_ops[] = {
2248
	{ .cmd = OVS_VPORT_CMD_NEW,
2249
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2250 2251 2252 2253
	  .policy = vport_policy,
	  .doit = ovs_vport_cmd_new
	},
	{ .cmd = OVS_VPORT_CMD_DEL,
2254
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
	  .policy = vport_policy,
	  .doit = ovs_vport_cmd_del
	},
	{ .cmd = OVS_VPORT_CMD_GET,
	  .flags = 0,		    /* OK for unprivileged users. */
	  .policy = vport_policy,
	  .doit = ovs_vport_cmd_get,
	  .dumpit = ovs_vport_cmd_dump
	},
	{ .cmd = OVS_VPORT_CMD_SET,
2265
	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2266 2267 2268 2269 2270
	  .policy = vport_policy,
	  .doit = ovs_vport_cmd_set,
	},
};

2271
struct genl_family dp_vport_genl_family __ro_after_init = {
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281
	.hdrsize = sizeof(struct ovs_header),
	.name = OVS_VPORT_FAMILY,
	.version = OVS_VPORT_VERSION,
	.maxattr = OVS_VPORT_ATTR_MAX,
	.netnsok = true,
	.parallel_ops = true,
	.ops = dp_vport_genl_ops,
	.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
	.mcgrps = &ovs_dp_vport_multicast_group,
	.n_mcgrps = 1,
2282
	.module = THIS_MODULE,
2283 2284
};

2285 2286 2287 2288 2289
static struct genl_family * const dp_genl_families[] = {
	&dp_datapath_genl_family,
	&dp_vport_genl_family,
	&dp_flow_genl_family,
	&dp_packet_genl_family,
A
Andy Zhou 已提交
2290
	&dp_meter_genl_family,
2291 2292 2293 2294 2295 2296 2297
};

static void dp_unregister_genl(int n_families)
{
	int i;

	for (i = 0; i < n_families; i++)
2298
		genl_unregister_family(dp_genl_families[i]);
2299 2300
}

2301
static int __init dp_register_genl(void)
2302 2303 2304 2305 2306 2307
{
	int err;
	int i;

	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {

2308
		err = genl_register_family(dp_genl_families[i]);
2309 2310 2311 2312 2313 2314 2315
		if (err)
			goto error;
	}

	return 0;

error:
2316
	dp_unregister_genl(i);
2317 2318 2319
	return err;
}

2320 2321 2322 2323 2324
static int __net_init ovs_init_net(struct net *net)
{
	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);

	INIT_LIST_HEAD(&ovs_net->dps);
2325
	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2326
	ovs_ct_init(net);
2327 2328 2329
	return 0;
}

P
Pravin B Shelar 已提交
2330 2331
static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
					    struct list_head *head)
2332
{
2333
	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
P
Pravin B Shelar 已提交
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345
	struct datapath *dp;

	list_for_each_entry(dp, &ovs_net->dps, list_node) {
		int i;

		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
			struct vport *vport;

			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
					continue;

2346
				if (dev_net(vport->dev) == dnet)
P
Pravin B Shelar 已提交
2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359
					list_add(&vport->detach_list, head);
			}
		}
	}
}

static void __net_exit ovs_exit_net(struct net *dnet)
{
	struct datapath *dp, *dp_next;
	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
	struct vport *vport, *vport_next;
	struct net *net;
	LIST_HEAD(head);
2360

2361
	ovs_ct_exit(dnet);
2362
	ovs_lock();
2363 2364
	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
		__dp_destroy(dp);
P
Pravin B Shelar 已提交
2365 2366

	rtnl_lock();
2367
	down_read(&net_rwsem);
P
Pravin B Shelar 已提交
2368 2369
	for_each_net(net)
		list_vports_from_net(net, dnet, &head);
2370
	up_read(&net_rwsem);
P
Pravin B Shelar 已提交
2371 2372 2373 2374 2375 2376 2377 2378
	rtnl_unlock();

	/* Detach all vports from given namespace. */
	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
		list_del(&vport->detach_list);
		ovs_dp_detach_port(vport);
	}

2379 2380 2381
	ovs_unlock();

	cancel_work_sync(&ovs_net->dp_notify_work);
2382 2383 2384 2385 2386 2387 2388 2389 2390
}

static struct pernet_operations ovs_net_ops = {
	.init = ovs_init_net,
	.exit = ovs_exit_net,
	.id   = &ovs_net_id,
	.size = sizeof(struct ovs_net),
};

2391 2392 2393 2394
static int __init dp_init(void)
{
	int err;

2395
	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2396 2397 2398

	pr_info("Open vSwitch switching datapath\n");

2399
	err = action_fifos_init();
2400 2401 2402
	if (err)
		goto error;

2403 2404 2405 2406
	err = ovs_internal_dev_rtnl_link_register();
	if (err)
		goto error_action_fifos_exit;

2407 2408 2409 2410
	err = ovs_flow_init();
	if (err)
		goto error_unreg_rtnl_link;

2411 2412 2413 2414
	err = ovs_vport_init();
	if (err)
		goto error_flow_exit;

2415
	err = register_pernet_device(&ovs_net_ops);
2416 2417 2418
	if (err)
		goto error_vport_exit;

2419 2420 2421 2422
	err = register_netdevice_notifier(&ovs_dp_device_notifier);
	if (err)
		goto error_netns_exit;

2423 2424 2425 2426
	err = ovs_netdev_init();
	if (err)
		goto error_unreg_notifier;

2427 2428
	err = dp_register_genl();
	if (err < 0)
2429
		goto error_unreg_netdev;
2430 2431 2432

	return 0;

2433 2434
error_unreg_netdev:
	ovs_netdev_exit();
2435 2436
error_unreg_notifier:
	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2437 2438
error_netns_exit:
	unregister_pernet_device(&ovs_net_ops);
2439 2440 2441 2442
error_vport_exit:
	ovs_vport_exit();
error_flow_exit:
	ovs_flow_exit();
2443 2444
error_unreg_rtnl_link:
	ovs_internal_dev_rtnl_link_unregister();
2445 2446
error_action_fifos_exit:
	action_fifos_exit();
2447 2448 2449 2450 2451 2452 2453
error:
	return err;
}

static void dp_cleanup(void)
{
	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2454
	ovs_netdev_exit();
2455
	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2456 2457
	unregister_pernet_device(&ovs_net_ops);
	rcu_barrier();
2458 2459
	ovs_vport_exit();
	ovs_flow_exit();
2460
	ovs_internal_dev_rtnl_link_unregister();
2461
	action_fifos_exit();
2462 2463 2464 2465 2466 2467 2468
}

module_init(dp_init);
module_exit(dp_cleanup);

MODULE_DESCRIPTION("Open vSwitch switching datapath");
MODULE_LICENSE("GPL");
2469 2470 2471 2472
MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
A
Andy Zhou 已提交
2473
MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);