smc_pnet.c 25.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Generic netlink support functions to configure an SMC-R PNET table
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
 */

#include <linux/module.h>
#include <linux/list.h>
#include <linux/ctype.h>
#include <net/netlink.h>
#include <net/genetlink.h>

#include <uapi/linux/if.h>
#include <uapi/linux/smc.h>

#include <rdma/ib_verbs.h>

23 24 25
#include <net/netns/generic.h>
#include "smc_netns.h"

26 27
#include "smc_pnet.h"
#include "smc_ib.h"
28
#include "smc_ism.h"
29
#include "smc_core.h"
30

H
Hans Wippel 已提交
31 32 33 34
#define SMC_ASCII_BLANK 32

static struct net_device *pnet_find_base_ndev(struct net_device *ndev);

35 36 37
static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
	[SMC_PNETID_NAME] = {
		.type = NLA_NUL_STRING,
38
		.len = SMC_MAX_PNETID_LEN
39 40 41 42 43 44 45 46 47 48 49 50 51 52
	},
	[SMC_PNETID_ETHNAME] = {
		.type = NLA_NUL_STRING,
		.len = IFNAMSIZ - 1
	},
	[SMC_PNETID_IBNAME] = {
		.type = NLA_NUL_STRING,
		.len = IB_DEVICE_NAME_MAX - 1
	},
	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
};

static struct genl_family smc_pnet_nl_family;

53 54 55
enum smc_pnet_nametype {
	SMC_PNET_ETH	= 1,
	SMC_PNET_IB	= 2,
56 57
};

H
Hans Wippel 已提交
58 59 60 61
/* pnet entry stored in pnet table */
struct smc_pnetentry {
	struct list_head list;
	char pnet_name[SMC_MAX_PNETID_LEN + 1];
62 63 64 65 66 67 68 69 70 71 72
	enum smc_pnet_nametype type;
	union {
		struct {
			char eth_name[IFNAMSIZ + 1];
			struct net_device *ndev;
		};
		struct {
			char ib_name[IB_DEVICE_NAME_MAX + 1];
			u8 ib_port;
		};
	};
H
Hans Wippel 已提交
73
};
74

H
Hans Wippel 已提交
75 76
/* Check if two given pnetids match */
static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
77
{
H
Hans Wippel 已提交
78
	int i;
79

H
Hans Wippel 已提交
80 81 82
	for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
		if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
		    (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
83
			break;
H
Hans Wippel 已提交
84 85
		if (pnetid1[i] != pnetid2[i])
			return false;
86
	}
H
Hans Wippel 已提交
87
	return true;
88 89 90 91
}

/* Remove a pnetid from the pnet table.
 */
92
static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
93 94
{
	struct smc_pnetentry *pnetelem, *tmp_pe;
95
	struct smc_pnettable *pnettable;
H
Hans Wippel 已提交
96
	struct smc_ib_device *ibdev;
97
	struct smcd_dev *smcd_dev;
98
	struct smc_net *sn;
99
	int rc = -ENOENT;
H
Hans Wippel 已提交
100
	int ibport;
101

102 103 104 105
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

106
	/* remove table entry */
107 108
	write_lock(&pnettable->lock);
	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
109
				 list) {
H
Hans Wippel 已提交
110 111
		if (!pnet_name ||
		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
112
			list_del(&pnetelem->list);
113
			if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
114
				dev_put(pnetelem->ndev);
115 116 117 118 119 120
				pr_warn_ratelimited("smc: net device %s "
						    "erased user defined "
						    "pnetid %.16s\n",
						    pnetelem->eth_name,
						    pnetelem->pnet_name);
			}
121 122 123 124
			kfree(pnetelem);
			rc = 0;
		}
	}
125 126 127 128 129 130
	write_unlock(&pnettable->lock);

	/* if this is not the initial namespace, stop here */
	if (net != &init_net)
		return rc;

H
Hans Wippel 已提交
131 132 133 134 135 136 137 138
	/* remove ib devices */
	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
			if (ibdev->pnetid_by_user[ibport] &&
			    (!pnet_name ||
			     smc_pnet_match(pnet_name,
					    ibdev->pnetid[ibport]))) {
139 140 141 142 143 144
				pr_warn_ratelimited("smc: ib device %s ibport "
						    "%d erased user defined "
						    "pnetid %.16s\n",
						    ibdev->ibdev->name,
						    ibport + 1,
						    ibdev->pnetid[ibport]);
H
Hans Wippel 已提交
145 146 147 148 149 150 151 152
				memset(ibdev->pnetid[ibport], 0,
				       SMC_MAX_PNETID_LEN);
				ibdev->pnetid_by_user[ibport] = false;
				rc = 0;
			}
		}
	}
	spin_unlock(&smc_ib_devices.lock);
153 154 155 156 157 158
	/* remove smcd devices */
	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
		if (smcd_dev->pnetid_by_user &&
		    (!pnet_name ||
		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
159 160 161 162
			pr_warn_ratelimited("smc: smcd device %s "
					    "erased user defined pnetid "
					    "%.16s\n", dev_name(&smcd_dev->dev),
					    smcd_dev->pnetid);
163 164 165 166 167 168
			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
			smcd_dev->pnetid_by_user = false;
			rc = 0;
		}
	}
	spin_unlock(&smcd_dev_list.lock);
169 170 171
	return rc;
}

172
/* Add the reference to a given network device to the pnet table.
173
 */
174
static int smc_pnet_add_by_ndev(struct net_device *ndev)
175 176
{
	struct smc_pnetentry *pnetelem, *tmp_pe;
177 178 179
	struct smc_pnettable *pnettable;
	struct net *net = dev_net(ndev);
	struct smc_net *sn;
180 181
	int rc = -ENOENT;

182 183 184 185 186 187
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

	write_lock(&pnettable->lock);
	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
188 189 190 191
		if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
		    !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
			dev_hold(ndev);
			pnetelem->ndev = ndev;
192
			rc = 0;
193 194 195 196
			pr_warn_ratelimited("smc: adding net device %s with "
					    "user defined pnetid %.16s\n",
					    pnetelem->eth_name,
					    pnetelem->pnet_name);
197 198 199
			break;
		}
	}
200
	write_unlock(&pnettable->lock);
201 202 203
	return rc;
}

204
/* Remove the reference to a given network device from the pnet table.
205
 */
206
static int smc_pnet_remove_by_ndev(struct net_device *ndev)
207
{
208 209 210 211 212
	struct smc_pnetentry *pnetelem, *tmp_pe;
	struct smc_pnettable *pnettable;
	struct net *net = dev_net(ndev);
	struct smc_net *sn;
	int rc = -ENOENT;
H
Hans Wippel 已提交
213

214 215 216
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;
217

218 219 220 221 222 223
	write_lock(&pnettable->lock);
	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
		if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
			dev_put(pnetelem->ndev);
			pnetelem->ndev = NULL;
			rc = 0;
224 225 226 227
			pr_warn_ratelimited("smc: removing net device %s with "
					    "user defined pnetid %.16s\n",
					    pnetelem->eth_name,
					    pnetelem->pnet_name);
228
			break;
229 230
		}
	}
231 232 233
	write_unlock(&pnettable->lock);
	return rc;
}
234

235 236 237 238 239 240 241
/* Apply pnetid to ib device when no pnetid is set.
 */
static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
			      char *pnet_name)
{
	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
	bool applied = false;
H
Hans Wippel 已提交
242

243 244 245 246 247 248 249 250 251 252
	spin_lock(&smc_ib_devices.lock);
	if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
		memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
		       SMC_MAX_PNETID_LEN);
		ib_dev->pnetid_by_user[ib_port - 1] = true;
		applied = true;
	}
	spin_unlock(&smc_ib_devices.lock);
	return applied;
}
H
Hans Wippel 已提交
253

254 255 256 257 258 259
/* Apply pnetid to smcd device when no pnetid is set.
 */
static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
{
	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
	bool applied = false;
260

261 262 263 264 265
	spin_lock(&smcd_dev_list.lock);
	if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
		memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
		smcd_dev->pnetid_by_user = true;
		applied = true;
H
Hans Wippel 已提交
266
	}
267 268
	spin_unlock(&smcd_dev_list.lock);
	return applied;
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
}

/* The limit for pnetid is 16 characters.
 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
 * Lower case letters are converted to upper case.
 * Interior blanks should not be used.
 */
static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
{
	char *bf = skip_spaces(pnet_name);
	size_t len = strlen(bf);
	char *end = bf + len;

	if (!len)
		return false;
	while (--end >= bf && isspace(*end))
		;
U
Ursula Braun 已提交
286
	if (end - bf >= SMC_MAX_PNETID_LEN)
287 288 289 290 291 292 293 294 295 296 297 298
		return false;
	while (bf <= end) {
		if (!isalnum(*bf))
			return false;
		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
		bf++;
	}
	*pnetid = '\0';
	return true;
}

/* Find an infiniband device by a given name. The device might not exist. */
299
static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
300 301 302 303 304 305
{
	struct smc_ib_device *ibdev;

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		if (!strncmp(ibdev->ibdev->name, ib_name,
306 307 308
			     sizeof(ibdev->ibdev->name)) ||
		    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
			     IB_DEVICE_NAME_MAX - 1)) {
309 310 311 312 313 314 315 316 317
			goto out;
		}
	}
	ibdev = NULL;
out:
	spin_unlock(&smc_ib_devices.lock);
	return ibdev;
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
/* Find an smcd device by a given name. The device might not exist. */
static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
{
	struct smcd_dev *smcd_dev;

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
		if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
			     IB_DEVICE_NAME_MAX - 1))
			goto out;
	}
	smcd_dev = NULL;
out:
	spin_unlock(&smcd_dev_list.lock);
	return smcd_dev;
}

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
			    char *eth_name, char *pnet_name)
{
	struct smc_pnetentry *tmp_pe, *new_pe;
	struct net_device *ndev, *base_ndev;
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
	bool new_netdev;
	int rc;

	/* check if (base) netdev already has a pnetid. If there is one, we do
	 * not want to add a pnet table entry
	 */
	rc = -EEXIST;
	ndev = dev_get_by_name(net, eth_name);	/* dev_hold() */
	if (ndev) {
		base_ndev = pnet_find_base_ndev(ndev);
		if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
					    base_ndev->dev_port, ndev_pnetid))
			goto out_put;
	}

	/* add a new netdev entry to the pnet table if there isn't one */
	rc = -ENOMEM;
	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
	if (!new_pe)
		goto out_put;
	new_pe->type = SMC_PNET_ETH;
	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
	strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
	new_pe->ndev = ndev;

	rc = -EEXIST;
	new_netdev = true;
	write_lock(&pnettable->lock);
	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
		if (tmp_pe->type == SMC_PNET_ETH &&
		    !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
			new_netdev = false;
			break;
		}
	}
	if (new_netdev) {
		list_add_tail(&new_pe->list, &pnettable->pnetlist);
		write_unlock(&pnettable->lock);
	} else {
		write_unlock(&pnettable->lock);
		kfree(new_pe);
		goto out_put;
	}
384 385 386 387
	if (ndev)
		pr_warn_ratelimited("smc: net device %s "
				    "applied user defined pnetid %.16s\n",
				    new_pe->eth_name, new_pe->pnet_name);
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
	return 0;

out_put:
	if (ndev)
		dev_put(ndev);
	return rc;
}

static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
			   u8 ib_port, char *pnet_name)
{
	struct smc_pnetentry *tmp_pe, *new_pe;
	struct smc_ib_device *ib_dev;
	bool smcddev_applied = true;
	bool ibdev_applied = true;
	struct smcd_dev *smcd_dev;
	bool new_ibdev;

	/* try to apply the pnetid to active devices */
	ib_dev = smc_pnet_find_ib(ib_name);
408
	if (ib_dev) {
409
		ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
410 411 412 413 414 415 416
		if (ibdev_applied)
			pr_warn_ratelimited("smc: ib device %s ibport %d "
					    "applied user defined pnetid "
					    "%.16s\n", ib_dev->ibdev->name,
					    ib_port,
					    ib_dev->pnetid[ib_port - 1]);
	}
417
	smcd_dev = smc_pnet_find_smcd(ib_name);
418
	if (smcd_dev) {
419
		smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
420 421 422 423 424 425
		if (smcddev_applied)
			pr_warn_ratelimited("smc: smcd device %s "
					    "applied user defined pnetid "
					    "%.16s\n", dev_name(&smcd_dev->dev),
					    smcd_dev->pnetid);
	}
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
	/* Apply fails when a device has a hardware-defined pnetid set, do not
	 * add a pnet table entry in that case.
	 */
	if (!ibdev_applied || !smcddev_applied)
		return -EEXIST;

	/* add a new ib entry to the pnet table if there isn't one */
	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
	if (!new_pe)
		return -ENOMEM;
	new_pe->type = SMC_PNET_IB;
	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
	strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
	new_pe->ib_port = ib_port;

	new_ibdev = true;
	write_lock(&pnettable->lock);
	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
		if (tmp_pe->type == SMC_PNET_IB &&
		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
			new_ibdev = false;
			break;
		}
	}
	if (new_ibdev) {
		list_add_tail(&new_pe->list, &pnettable->pnetlist);
		write_unlock(&pnettable->lock);
	} else {
		write_unlock(&pnettable->lock);
		kfree(new_pe);
	}
	return (new_ibdev) ? 0 : -EEXIST;
}

/* Append a pnetid to the end of the pnet table if not already on this list.
461
 */
462
static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
463
{
464 465 466 467 468 469 470
	char pnet_name[SMC_MAX_PNETID_LEN + 1];
	struct smc_pnettable *pnettable;
	bool new_netdev = false;
	bool new_ibdev = false;
	struct smc_net *sn;
	u8 ibport = 1;
	char *string;
471
	int rc;
472

473 474 475
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;
476 477 478 479 480

	rc = -EINVAL;
	if (!tb[SMC_PNETID_NAME])
		goto error;
	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
481
	if (!smc_pnetid_valid(string, pnet_name))
482 483
		goto error;

H
Hans Wippel 已提交
484 485
	if (tb[SMC_PNETID_ETHNAME]) {
		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
486 487 488 489
		rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
		if (!rc)
			new_netdev = true;
		else if (rc != -EEXIST)
H
Hans Wippel 已提交
490 491
			goto error;
	}
492

493 494
	/* if this is not the initial namespace, stop here */
	if (net != &init_net)
495
		return new_netdev ? 0 : -EEXIST;
496

497
	rc = -EINVAL;
H
Hans Wippel 已提交
498
	if (tb[SMC_PNETID_IBNAME]) {
499 500 501 502 503
		string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
		string = strim(string);
		if (tb[SMC_PNETID_IBPORT]) {
			ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
			if (ibport < 1 || ibport > SMC_MAX_PORTS)
H
Hans Wippel 已提交
504 505
				goto error;
		}
506 507 508 509 510
		rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
		if (!rc)
			new_ibdev = true;
		else if (rc != -EEXIST)
			goto error;
H
Hans Wippel 已提交
511
	}
512
	return (new_netdev || new_ibdev) ? 0 : -EEXIST;
513 514 515 516 517 518

error:
	return rc;
}

/* Convert an smc_pnetentry to a netlink attribute sequence */
H
Hans Wippel 已提交
519
static int smc_pnet_set_nla(struct sk_buff *msg,
520
			    struct smc_pnetentry *pnetelem)
521
{
H
Hans Wippel 已提交
522
	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
523
		return -1;
524
	if (pnetelem->type == SMC_PNET_ETH) {
H
Hans Wippel 已提交
525
		if (nla_put_string(msg, SMC_PNETID_ETHNAME,
526
				   pnetelem->eth_name))
H
Hans Wippel 已提交
527 528 529 530
			return -1;
	} else {
		if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
			return -1;
531
	}
532 533
	if (pnetelem->type == SMC_PNET_IB) {
		if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
H
Hans Wippel 已提交
534 535 536 537 538 539
		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
			return -1;
	} else {
		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
			return -1;
540 541
	}

H
Hans Wippel 已提交
542
	return 0;
543 544 545 546 547
}

static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
	struct net *net = genl_info_net(info);
548

549
	return smc_pnet_enter(net, info->attrs);
550 551 552 553
}

static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
{
554 555
	struct net *net = genl_info_net(info);

556 557
	if (!info->attrs[SMC_PNETID_NAME])
		return -EINVAL;
558
	return smc_pnet_remove_by_pnetid(net,
559 560 561 562 563 564 565 566 567 568 569
				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
}

static int smc_pnet_dump_start(struct netlink_callback *cb)
{
	cb->args[0] = 0;
	return 0;
}

static int smc_pnet_dumpinfo(struct sk_buff *skb,
			     u32 portid, u32 seq, u32 flags,
570
			     struct smc_pnetentry *pnetelem)
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
{
	void *hdr;

	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
			  flags, SMC_PNETID_GET);
	if (!hdr)
		return -ENOMEM;
	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
		genlmsg_cancel(skb, hdr);
		return -EMSGSIZE;
	}
	genlmsg_end(skb, hdr);
	return 0;
}

586 587
static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
			  u32 seq, u8 *pnetid, int start_idx)
588
{
589
	struct smc_pnettable *pnettable;
590
	struct smc_pnetentry *pnetelem;
591
	struct smc_net *sn;
592 593
	int idx = 0;

594 595 596 597
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

598
	/* dump pnettable entries */
599 600
	read_lock(&pnettable->lock);
	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
H
Hans Wippel 已提交
601 602 603
		if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
			continue;
		if (idx++ < start_idx)
604
			continue;
605 606 607
		/* if this is not the initial namespace, dump only netdev */
		if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
			continue;
H
Hans Wippel 已提交
608
		if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
609
				      pnetelem)) {
610 611 612 613
			--idx;
			break;
		}
	}
614
	read_unlock(&pnettable->lock);
H
Hans Wippel 已提交
615 616 617 618 619
	return idx;
}

static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
620
	struct net *net = sock_net(skb->sk);
H
Hans Wippel 已提交
621 622
	int idx;

623
	idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
H
Hans Wippel 已提交
624 625 626
			     cb->nlh->nlmsg_seq, NULL, cb->args[0]);

	cb->args[0] = idx;
627 628 629
	return skb->len;
}

H
Hans Wippel 已提交
630 631 632
/* Retrieve one PNETID entry */
static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
{
633
	struct net *net = genl_info_net(info);
H
Hans Wippel 已提交
634 635 636 637 638 639 640 641 642 643
	struct sk_buff *msg;
	void *hdr;

	if (!info->attrs[SMC_PNETID_NAME])
		return -EINVAL;

	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
	if (!msg)
		return -ENOMEM;

644
	_smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
H
Hans Wippel 已提交
645 646 647 648 649 650 651 652 653 654 655 656
		       nla_data(info->attrs[SMC_PNETID_NAME]), 0);

	/* finish multi part message and send it */
	hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
			NLM_F_MULTI);
	if (!hdr) {
		nlmsg_free(msg);
		return -EMSGSIZE;
	}
	return genlmsg_reply(msg, info);
}

657 658 659 660
/* Remove and delete all pnetids from pnet table.
 */
static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
{
661 662
	struct net *net = genl_info_net(info);

663 664
	smc_pnet_remove_by_pnetid(net, NULL);
	return 0;
665 666 667 668 669 670
}

/* SMC_PNETID generic netlink operation definition */
static const struct genl_ops smc_pnet_ops[] = {
	{
		.cmd = SMC_PNETID_GET,
671
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
672
		/* can be retrieved by unprivileged users */
673 674 675 676 677 678
		.doit = smc_pnet_get,
		.dumpit = smc_pnet_dump,
		.start = smc_pnet_dump_start
	},
	{
		.cmd = SMC_PNETID_ADD,
679
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
680 681 682 683 684
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_add
	},
	{
		.cmd = SMC_PNETID_DEL,
685
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
686 687 688 689 690
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_del
	},
	{
		.cmd = SMC_PNETID_FLUSH,
691
		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
692 693 694 695 696 697
		.flags = GENL_ADMIN_PERM,
		.doit = smc_pnet_flush
	}
};

/* SMC_PNETID family definition */
698
static struct genl_family smc_pnet_nl_family __ro_after_init = {
699 700 701 702
	.hdrsize = 0,
	.name = SMCR_GENL_FAMILY_NAME,
	.version = SMCR_GENL_FAMILY_VERSION,
	.maxattr = SMC_PNETID_MAX,
703
	.policy = smc_pnet_policy,
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
	.netnsok = true,
	.module = THIS_MODULE,
	.ops = smc_pnet_ops,
	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
};

static int smc_pnet_netdev_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
{
	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);

	switch (event) {
	case NETDEV_REBOOT:
	case NETDEV_UNREGISTER:
		smc_pnet_remove_by_ndev(event_dev);
719
		return NOTIFY_OK;
720 721 722
	case NETDEV_REGISTER:
		smc_pnet_add_by_ndev(event_dev);
		return NOTIFY_OK;
723
	default:
724
		return NOTIFY_DONE;
725 726 727 728 729 730 731
	}
}

static struct notifier_block smc_netdev_notifier = {
	.notifier_call = smc_pnet_netdev_event
};

732 733 734 735 736 737 738 739 740 741 742 743
/* init network namespace */
int smc_pnet_net_init(struct net *net)
{
	struct smc_net *sn = net_generic(net, smc_net_id);
	struct smc_pnettable *pnettable = &sn->pnettable;

	INIT_LIST_HEAD(&pnettable->pnetlist);
	rwlock_init(&pnettable->lock);

	return 0;
}

744 745 746 747 748 749 750 751 752 753 754 755 756
int __init smc_pnet_init(void)
{
	int rc;

	rc = genl_register_family(&smc_pnet_nl_family);
	if (rc)
		return rc;
	rc = register_netdevice_notifier(&smc_netdev_notifier);
	if (rc)
		genl_unregister_family(&smc_pnet_nl_family);
	return rc;
}

757 758 759 760 761 762 763
/* exit network namespace */
void smc_pnet_net_exit(struct net *net)
{
	/* flush pnet table */
	smc_pnet_remove_by_pnetid(net, NULL);
}

764 765 766 767 768 769
void smc_pnet_exit(void)
{
	unregister_netdevice_notifier(&smc_netdev_notifier);
	genl_unregister_family(&smc_pnet_nl_family);
}

U
Ursula Braun 已提交
770 771 772 773
/* Determine one base device for stacked net devices.
 * If the lower device level contains more than one devices
 * (for instance with bonding slaves), just the first device
 * is used to reach a base device.
774
 */
U
Ursula Braun 已提交
775
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
776
{
U
Ursula Braun 已提交
777
	int i, nest_lvl;
778

U
Ursula Braun 已提交
779
	rtnl_lock();
780
	nest_lvl = ndev->lower_level;
U
Ursula Braun 已提交
781 782 783 784 785 786 787 788 789 790 791 792
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = netdev_lower_get_next(ndev, &lower);
	}
	rtnl_unlock();
	return ndev;
}

793
static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
H
Hans Wippel 已提交
794 795
					      u8 *pnetid)
{
796 797
	struct smc_pnettable *pnettable;
	struct net *net = dev_net(ndev);
H
Hans Wippel 已提交
798
	struct smc_pnetentry *pnetelem;
799
	struct smc_net *sn;
H
Hans Wippel 已提交
800 801
	int rc = -ENOENT;

802 803 804 805 806 807
	/* get pnettable for namespace */
	sn = net_generic(net, smc_net_id);
	pnettable = &sn->pnettable;

	read_lock(&pnettable->lock);
	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
808
		if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
H
Hans Wippel 已提交
809 810 811 812 813 814
			/* get pnetid of netdev device */
			memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
			rc = 0;
			break;
		}
	}
815
	read_unlock(&pnettable->lock);
H
Hans Wippel 已提交
816 817 818
	return rc;
}

819 820
/* find a roce device for the given pnetid */
static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
821 822
					  struct smc_init_info *ini,
					  struct smc_ib_device *known_dev)
823 824 825 826 827 828 829
{
	struct smc_ib_device *ibdev;
	int i;

	ini->ib_dev = NULL;
	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
830 831
		if (ibdev == known_dev)
			continue;
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
		for (i = 1; i <= SMC_MAX_PORTS; i++) {
			if (!rdma_is_port_valid(ibdev->ibdev, i))
				continue;
			if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
			    smc_ib_port_active(ibdev, i) &&
			    !test_bit(i - 1, ibdev->ports_going_away) &&
			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
						  ini->ib_gid, NULL)) {
				ini->ib_dev = ibdev;
				ini->ib_port = i;
				goto out;
			}
		}
	}
out:
	spin_unlock(&smc_ib_devices.lock);
}

850 851 852 853 854 855 856 857
/* find alternate roce device with same pnet_id and vlan_id */
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
			    struct smc_init_info *ini,
			    struct smc_ib_device *known_dev)
{
	_smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
}

858 859 860 861
/* if handshake network device belongs to a roce device, return its
 * IB device and port
 */
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
862
				   struct smc_init_info *ini)
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
{
	struct smc_ib_device *ibdev;

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
		struct net_device *ndev;
		int i;

		for (i = 1; i <= SMC_MAX_PORTS; i++) {
			if (!rdma_is_port_valid(ibdev->ibdev, i))
				continue;
			if (!ibdev->ibdev->ops.get_netdev)
				continue;
			ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
			if (!ndev)
				continue;
			dev_put(ndev);
			if (netdev == ndev &&
			    smc_ib_port_active(ibdev, i) &&
882
			    !test_bit(i - 1, ibdev->ports_going_away) &&
883 884 885 886
			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
						  ini->ib_gid, NULL)) {
				ini->ib_dev = ibdev;
				ini->ib_port = i;
887 888 889 890 891 892 893
				break;
			}
		}
	}
	spin_unlock(&smc_ib_devices.lock);
}

U
Ursula Braun 已提交
894
/* Determine the corresponding IB device port based on the hardware PNETID.
895 896
 * Searching stops at the first matching active IB device port with vlan_id
 * configured.
897 898
 * If nothing found, check pnetid table.
 * If nothing found, try to use handshake device
U
Ursula Braun 已提交
899 900
 */
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
901
					 struct smc_init_info *ini)
U
Ursula Braun 已提交
902 903 904 905 906
{
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];

	ndev = pnet_find_base_ndev(ndev);
	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
H
Hans Wippel 已提交
907
				   ndev_pnetid) &&
908
	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
909
		smc_pnet_find_rdma_dev(ndev, ini);
U
Ursula Braun 已提交
910
		return; /* pnetid could not be determined */
911
	}
912
	_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
U
Ursula Braun 已提交
913 914
}

915
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
916
					struct smc_init_info *ini)
917 918 919 920 921 922
{
	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
	struct smcd_dev *ismdev;

	ndev = pnet_find_base_ndev(ndev);
	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
923 924
				   ndev_pnetid) &&
	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
925 926 927 928
		return; /* pnetid could not be determined */

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
929 930
		if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
		    !ismdev->going_away) {
931
			ini->ism_dev = ismdev;
932 933 934 935 936 937
			break;
		}
	}
	spin_unlock(&smcd_dev_list.lock);
}

U
Ursula Braun 已提交
938 939 940 941
/* PNET table analysis for a given sock:
 * determine ib_device and port belonging to used internal TCP socket
 * ethernet interface.
 */
942
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
U
Ursula Braun 已提交
943 944 945
{
	struct dst_entry *dst = sk_dst_get(sk);

946 947
	ini->ib_dev = NULL;
	ini->ib_port = 0;
U
Ursula Braun 已提交
948 949 950 951 952
	if (!dst)
		goto out;
	if (!dst->dev)
		goto out_rel;

953
	smc_pnet_find_roce_by_pnetid(dst->dev, ini);
U
Ursula Braun 已提交
954

955 956
out_rel:
	dst_release(dst);
U
Ursula Braun 已提交
957 958
out:
	return;
959
}
960

961
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
962 963 964
{
	struct dst_entry *dst = sk_dst_get(sk);

965
	ini->ism_dev = NULL;
966 967 968 969 970
	if (!dst)
		goto out;
	if (!dst->dev)
		goto out_rel;

971
	smc_pnet_find_ism_by_pnetid(dst->dev, ini);
972 973 974 975 976 977

out_rel:
	dst_release(dst);
out:
	return;
}
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034

/* Lookup and apply a pnet table entry to the given ib device.
 */
int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
{
	char *ib_name = smcibdev->ibdev->name;
	struct smc_pnettable *pnettable;
	struct smc_pnetentry *tmp_pe;
	struct smc_net *sn;
	int rc = -ENOENT;

	/* get pnettable for init namespace */
	sn = net_generic(&init_net, smc_net_id);
	pnettable = &sn->pnettable;

	read_lock(&pnettable->lock);
	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
		if (tmp_pe->type == SMC_PNET_IB &&
		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
		    tmp_pe->ib_port == ib_port) {
			smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
			rc = 0;
			break;
		}
	}
	read_unlock(&pnettable->lock);

	return rc;
}

/* Lookup and apply a pnet table entry to the given smcd device.
 */
int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
{
	const char *ib_name = dev_name(&smcddev->dev);
	struct smc_pnettable *pnettable;
	struct smc_pnetentry *tmp_pe;
	struct smc_net *sn;
	int rc = -ENOENT;

	/* get pnettable for init namespace */
	sn = net_generic(&init_net, smc_net_id);
	pnettable = &sn->pnettable;

	read_lock(&pnettable->lock);
	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
		if (tmp_pe->type == SMC_PNET_IB &&
		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
			smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
			rc = 0;
			break;
		}
	}
	read_unlock(&pnettable->lock);

	return rc;
}